diff --git a/eland/__init__.py b/eland/__init__.py
index eee58ae..bf309ea 100644
--- a/eland/__init__.py
+++ b/eland/__init__.py
@@ -1,7 +1,15 @@
+import os
+
+# Set modin to pandas to avoid starting ray or other
+os.environ["MODIN_ENGINE"] = 'python'
+os.environ["MODIN_BACKEND"] = 'pandas'
+
from .client import *
-from .ndframe import *
from .index import *
from .mappings import *
+from .operations import *
+from .query_compiler import *
+from .ndframe import *
from .series import *
from .dataframe import *
from .utils import *
diff --git a/eland/client.py b/eland/client.py
index 3b1231f..5806481 100644
--- a/eland/client.py
+++ b/eland/client.py
@@ -1,37 +1,34 @@
from elasticsearch import Elasticsearch
from elasticsearch import helpers
-class Client():
+class Client:
"""
eland client - implemented as facade to control access to Elasticsearch methods
"""
def __init__(self, es=None):
if isinstance(es, Elasticsearch):
- self.es = es
+ self._es = es
elif isinstance(es, Client):
- self.es = es.es
+ self._es = es._es
else:
- self.es = Elasticsearch(es)
+ self._es = Elasticsearch(es)
- def info(self):
- return self.es.info()
-
- def indices(self):
- return self.es.indices
+ def get_mapping(self, **kwargs):
+ return self._es.indices.get_mapping(**kwargs)
def bulk(self, actions, refresh=False):
- return helpers.bulk(self.es, actions, refresh=refresh)
+ return helpers.bulk(self._es, actions, refresh=refresh)
def scan(self, **kwargs):
- return helpers.scan(self.es, **kwargs)
+ return helpers.scan(self._es, **kwargs)
def search(self, **kwargs):
- return self.es.search(**kwargs)
+ return self._es.search(**kwargs)
def field_caps(self, **kwargs):
- return self.es.field_caps(**kwargs)
+ return self._es.field_caps(**kwargs)
def count(self, **kwargs):
- count_json = self.es.count(**kwargs)
+ count_json = self._es.count(**kwargs)
return count_json['count']
diff --git a/eland/dataframe.py b/eland/dataframe.py
index b4ed1c4..a7e45c4 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -1,394 +1,58 @@
-"""
-DataFrame
----------
-An efficient 2D container for potentially mixed-type time series or other
-labeled data series.
-
-The underlying data resides in Elasticsearch and the API aligns as much as
-possible with pandas.DataFrame API.
-
-This allows the eland.DataFrame to access large datasets stored in Elasticsearch,
-without storing the dataset in local memory.
-
-Implementation Details
-----------------------
-
-Elasticsearch indexes can be configured in many different ways, and these indexes
-utilise different data structures to pandas.DataFrame.
-
-eland.DataFrame operations that return individual rows (e.g. df.head()) return
-_source data. If _source is not enabled, this data is not accessible.
-
-Similarly, only Elasticsearch searchable fields can be searched or filtered, and
-only Elasticsearch aggregatable fields can be aggregated or grouped.
-
-"""
-import sys
+from eland import NDFrame
import pandas as pd
-from pandas.io.formats import format as fmt
-from pandas.io.formats.printing import pprint_thing
-from pandas.compat import StringIO
-from pandas.io.common import _expand_user, _stringify_path
-from pandas.io.formats import console
-from pandas.core import common as com
-
-from eland import NDFrame
-from eland import Index
-from eland import Series
-
-
-
-
class DataFrame(NDFrame):
- """
- pandas.DataFrame like API that proxies into Elasticsearch index(es).
-
- Parameters
- ----------
- client : eland.Client
- A reference to a Elasticsearch python client
-
- index_pattern : str
- An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*).
-
- See Also
- --------
-
- Examples
- --------
-
- import eland as ed
- client = ed.Client(Elasticsearch())
- df = ed.DataFrame(client, 'reviews')
- df.head()
- reviewerId vendorId rating date
- 0 0 0 5 2006-04-07 17:08
- 1 1 1 5 2006-05-04 12:16
- 2 2 2 4 2006-04-21 12:26
- 3 3 3 5 2006-04-18 15:48
- 4 3 4 5 2006-04-18 15:49
-
- Notice that the types are based on Elasticsearch mappings
-
- Notes
- -----
- If the Elasticsearch index is deleted or index mappings are changed after this
- object is created, the object is not rebuilt and so inconsistencies can occur.
-
- """
-
+ # TODO create effectively 2 constructors
+ # 1. client, index_pattern, columns, index_field
+ # 2. query_compiler
def __init__(self,
- client,
- index_pattern,
- mappings=None,
- index_field=None):
+ client=None,
+ index_pattern=None,
+ columns=None,
+ index_field=None,
+ query_compiler=None):
# python 3 syntax
- super().__init__(client, index_pattern, mappings=mappings, index_field=index_field)
+ super().__init__(
+ client=client,
+ index_pattern=index_pattern,
+ columns=columns,
+ index_field=index_field,
+ query_compiler=query_compiler)
- def head(self, n=5):
- return super()._head(n)
+ def _get_columns(self):
+ return self._query_compiler.columns
- def tail(self, n=5):
- return super()._tail(n)
-
- def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
- null_counts=None):
- """
- Print a concise summary of a DataFrame.
-
- This method prints information about a DataFrame including
- the index dtype and column dtypes, non-null values and memory usage.
-
- This copies a lot of code from pandas.DataFrame.info as it is difficult
- to split out the appropriate code or creating a SparseDataFrame gives
- incorrect results on types and counts.
- """
- if buf is None: # pragma: no cover
- buf = sys.stdout
-
- lines = []
-
- lines.append(str(type(self)))
- lines.append(self._index_summary())
-
- if len(self.columns) == 0:
- lines.append('Empty {name}'.format(name=type(self).__name__))
- fmt.buffer_put_lines(buf, lines)
- return
-
- cols = self.columns
-
- # hack
- if max_cols is None:
- max_cols = pd.get_option('display.max_info_columns',
- len(self.columns) + 1)
-
- max_rows = pd.get_option('display.max_info_rows', len(self) + 1)
-
- if null_counts is None:
- show_counts = ((len(self.columns) <= max_cols) and
- (len(self) < max_rows))
- else:
- show_counts = null_counts
- exceeds_info_cols = len(self.columns) > max_cols
-
- def _verbose_repr():
- lines.append('Data columns (total %d columns):' %
- len(self.columns))
- space = max(len(pprint_thing(k)) for k in self.columns) + 4
- counts = None
-
- tmpl = "{count}{dtype}"
- if show_counts:
- counts = self.count()
- if len(cols) != len(counts): # pragma: no cover
- raise AssertionError(
- 'Columns must equal counts '
- '({cols:d} != {counts:d})'.format(
- cols=len(cols), counts=len(counts)))
- tmpl = "{count} non-null {dtype}"
-
- dtypes = self.dtypes
- for i, col in enumerate(self.columns):
- dtype = dtypes.iloc[i]
- col = pprint_thing(col)
-
- count = ""
- if show_counts:
- count = counts.iloc[i]
-
- lines.append(_put_str(col, space) + tmpl.format(count=count,
- dtype=dtype))
-
- def _non_verbose_repr():
- lines.append(self.columns._summary(name='Columns'))
-
- def _sizeof_fmt(num, size_qualifier):
- # returns size in human readable format
- for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
- if num < 1024.0:
- return ("{num:3.1f}{size_q} "
- "{x}".format(num=num, size_q=size_qualifier, x=x))
- num /= 1024.0
- return "{num:3.1f}{size_q} {pb}".format(num=num,
- size_q=size_qualifier,
- pb='PB')
-
- if verbose:
- _verbose_repr()
- elif verbose is False: # specifically set to False, not nesc None
- _non_verbose_repr()
- else:
- if exceeds_info_cols:
- _non_verbose_repr()
- else:
- _verbose_repr()
-
- counts = self.get_dtype_counts()
- dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k
- in sorted(counts.items())]
- lines.append('dtypes: {types}'.format(types=', '.join(dtypes)))
-
- if memory_usage is None:
- memory_usage = pd.get_option('display.memory_usage')
- if memory_usage:
- # append memory usage of df to display
- size_qualifier = ''
-
- # TODO - this is different from pd.DataFrame as we shouldn't
- # really hold much in memory. For now just approximate with getsizeof + ignore deep
- mem_usage = sys.getsizeof(self)
- lines.append("memory usage: {mem}\n".format(
- mem=_sizeof_fmt(mem_usage, size_qualifier)))
-
- fmt.buffer_put_lines(buf, lines)
+ columns = property(_get_columns)
@property
- def shape(self):
+ def empty(self):
+ """Determines if the DataFrame is empty.
+
+ Returns:
+ True if the DataFrame is empty.
+ False otherwise.
"""
- Return a tuple representing the dimensionality of the DataFrame.
+ # TODO - this is called on every attribute get (most methods) from modin/pandas/base.py:3337
+ # (as Index.__len__ performs an query) we may want to cache self.index.empty()
+ return len(self.columns) == 0 or len(self.index) == 0
- Returns
- -------
- shape: tuple
- 0 - number of rows
- 1 - number of columns
- """
- num_rows = len(self)
- num_columns = len(self.columns)
+ def head(self, n=5):
+ return super().head(n)
- return num_rows, num_columns
+ def tail(self, n=5):
+ return super().tail(n)
- def set_index(self, index_field):
- copy = self.copy()
- copy._index = Index(index_field)
- return copy
-
- def _index_summary(self):
- head = self.head(1).index[0]
- tail = self.tail(1).index[0]
- index_summary = ', %s to %s' % (pprint_thing(head),
- pprint_thing(tail))
-
- name = "Index"
- return '%s: %s entries%s' % (name, len(self), index_summary)
-
- def count(self):
- """
- Count non-NA cells for each column (TODO row)
-
- Counts are based on exists queries against ES
-
- This is inefficient, as it creates N queries (N is number of fields).
-
- An alternative approach is to use value_count aggregations. However, they have issues in that:
- 1. They can only be used with aggregatable fields (e.g. keyword not text)
- 2. For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2
- for a single document.
- """
- counts = {}
- for field in self._mappings.source_fields():
- exists_query = {"query": {"exists": {"field": field}}}
- field_exists_count = self._client.count(index=self._index_pattern, body=exists_query)
- counts[field] = field_exists_count
-
- count = pd.Series(data=counts, index=self._mappings.source_fields())
-
- return count
-
- def describe(self):
- return super()._describe()
-
-
- def __getitem__(self, key):
- # NOTE: there is a difference between pandas here.
- # e.g. df['a'] returns pd.Series, df[['a','b']] return pd.DataFrame
-
- # Implementation mainly copied from pandas v0.24.2
- # (https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html)
- key = com.apply_if_callable(key, self)
-
- # TODO - add slice capabilities - need to add index features first
- # e.g. set index etc.
- # Do we have a slicer (on rows)?
- """
- indexer = convert_to_index_sliceable(self, key)
- if indexer is not None:
- return self._slice(indexer, axis=0)
- # Do we have a (boolean) DataFrame?
- if isinstance(key, DataFrame):
- return self._getitem_frame(key)
- """
-
- # Do we have a (boolean) 1d indexer?
- """
- if com.is_bool_indexer(key):
- return self._getitem_bool_array(key)
- """
-
- # We are left with two options: a single key, and a collection of keys,
- columns = []
- is_single_key = False
- if isinstance(key, str):
- if not self._mappings.is_source_field(key):
- raise TypeError('Column does not exist: [{0}]'.format(key))
- columns.append(key)
- is_single_key = True
- elif isinstance(key, list):
- columns.extend(key)
- else:
- raise TypeError('__getitem__ arguments invalid: [{0}]'.format(key))
-
- mappings = self._filter_mappings(columns)
-
- # Return new eland.DataFrame with modified mappings
- if is_single_key:
- return Series(self._client, self._index_pattern, mappings=mappings)
- else:
- return DataFrame(self._client, self._index_pattern, mappings=mappings)
-
-
- def __getattr__(self, name):
- # Note: obj.x will always call obj.__getattribute__('x') prior to
- # calling obj.__getattr__('x').
- mappings = self._filter_mappings([name])
-
- return Series(self._client, self._index_pattern, mappings=mappings)
-
- def copy(self):
- # TODO - test and validate...may need deep copying
- return DataFrame(self._client,
- self._index_pattern,
- self._mappings,
- self._index)
-
- # ----------------------------------------------------------------------
- # Rendering Methods
def __repr__(self):
- """
- From pandas
- """
- buf = StringIO()
+ num_rows = pd.get_option("max_rows") or 60
+ num_cols = pd.get_option("max_columns") or 20
- max_rows = pd.get_option("display.max_rows")
- max_cols = pd.get_option("display.max_columns")
- show_dimensions = pd.get_option("display.show_dimensions")
- if pd.get_option("display.expand_frame_repr"):
- width, _ = console.get_console_size()
+ result = repr(self._build_repr_df(num_rows, num_cols))
+ if len(self.index) > num_rows or len(self.columns) > num_cols:
+ # The split here is so that we don't repr pandas row lengths.
+ return result.rsplit("\n\n", 1)[0] + "\n\n[{0} rows x {1} columns]".format(
+ len(self.index), len(self.columns)
+ )
else:
- width = None
- self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
- line_width=width, show_dimensions=show_dimensions)
-
- return buf.getvalue()
-
- def to_string(self, buf=None, columns=None, col_space=None, header=True,
- index=True, na_rep='NaN', formatters=None, float_format=None,
- sparsify=None, index_names=True, justify=None,
- max_rows=None, max_cols=None, show_dimensions=True,
- decimal='.', line_width=None):
- """
- From pandas
- """
- if max_rows == None:
- max_rows = pd.get_option('display.max_rows')
-
- df = self._fake_head_tail_df(max_rows=max_rows+1)
-
- if buf is not None:
- _buf = _expand_user(_stringify_path(buf))
- else:
- _buf = StringIO()
-
- df.to_string(buf=_buf, columns=columns,
- col_space=col_space, na_rep=na_rep,
- formatters=formatters,
- float_format=float_format,
- sparsify=sparsify, justify=justify,
- index_names=index_names,
- header=header, index=index,
- max_rows=max_rows,
- max_cols=max_cols,
- show_dimensions=False, # print this outside of this call
- decimal=decimal,
- line_width=line_width)
-
- # Our fake dataframe has incorrect number of rows (max_rows*2+1) - write out
- # the correct number of rows
- if show_dimensions:
- _buf.write("\n\n[{nrows} rows x {ncols} columns]"
- .format(nrows=self._index_count(), ncols=len(self.columns)))
-
- if buf is None:
- result = _buf.getvalue()
return result
-
- def to_pandas(selfs):
- return super()._to_pandas()
-
-# From pandas.DataFrame
-def _put_str(s, space):
- return '{s}'.format(s=s)[:space].ljust(space)
diff --git a/eland/index.py b/eland/index.py
index be0425f..eb13ce6 100644
--- a/eland/index.py
+++ b/eland/index.py
@@ -18,10 +18,12 @@ class Index:
ID_INDEX_FIELD = '_id'
ID_SORT_FIELD = '_doc' # if index field is _id, sort by _doc
- def __init__(self, index_field=None):
+ def __init__(self, query_compiler, index_field=None):
# Calls setter
self.index_field = index_field
+ self._query_compiler = query_compiler
+
@property
def sort_field(self):
if self._index_field == self.ID_INDEX_FIELD:
@@ -38,9 +40,12 @@ class Index:
@index_field.setter
def index_field(self, index_field):
- if index_field == None:
+ if index_field == None or index_field == Index.ID_INDEX_FIELD:
self._index_field = Index.ID_INDEX_FIELD
self._is_source_field = False
else:
self._index_field = index_field
self._is_source_field = True
+
+ def __len__(self):
+ return self._query_compiler._index_count()
diff --git a/eland/mappings.py b/eland/mappings.py
index 1dc0bee..76d7286 100644
--- a/eland/mappings.py
+++ b/eland/mappings.py
@@ -4,7 +4,7 @@ import pandas as pd
from pandas.core.dtypes.common import (is_float_dtype, is_bool_dtype, is_integer_dtype, is_datetime_or_timedelta_dtype, is_string_dtype)
-class Mappings():
+class Mappings:
"""
General purpose to manage Elasticsearch to/from pandas mappings
@@ -53,7 +53,7 @@ class Mappings():
Columns to copy
"""
if (client is not None) and (index_pattern is not None):
- get_mapping = client.indices().get_mapping(index=index_pattern)
+ get_mapping = client.get_mapping(index=index_pattern)
# Get all fields (including all nested) and then field_caps
# for these names (fields=* doesn't appear to work effectively...)
@@ -67,12 +67,8 @@ class Mappings():
# field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source
self._mappings_capabilities = Mappings._create_capability_matrix(all_fields, source_fields, all_fields_caps)
else:
- if columns is not None:
- # Reference object and restrict mapping columns
- self._mappings_capabilities = mappings._mappings_capabilities.loc[columns]
- else:
- # straight copy
- self._mappings_capabilities = mappings._mappings_capabilities.copy()
+ # straight copy
+ self._mappings_capabilities = mappings._mappings_capabilities.copy()
# Cache source field types for efficient lookup
# (this massively improves performance of DataFrame.flatten)
diff --git a/eland/ndframe.py b/eland/ndframe.py
index 9fbd312..c1b7f5b 100644
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@@ -22,350 +22,55 @@ Similarly, only Elasticsearch searchable fields can be searched or filtered, and
only Elasticsearch aggregatable fields can be aggregated or grouped.
"""
-import pandas as pd
-import functools
-from elasticsearch_dsl import Search
-import eland as ed
+from modin.pandas.base import BasePandasDataset
-from pandas.core.generic import NDFrame as pd_NDFrame
-from pandas._libs import Timestamp, iNaT, properties
+from eland import ElandQueryCompiler
-class NDFrame():
- """
- pandas.DataFrame/Series like API that proxies into Elasticsearch index(es).
+class NDFrame(BasePandasDataset):
- Parameters
- ----------
- client : eland.Client
- A reference to a Elasticsearch python client
-
- index_pattern : str
- An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*).
-
- See Also
- --------
-
- """
def __init__(self,
- client,
- index_pattern,
- mappings=None,
- index_field=None):
-
- self._client = ed.Client(client)
- self._index_pattern = index_pattern
-
- # Get and persist mappings, this allows us to correctly
- # map returned types from Elasticsearch to pandas datatypes
- if mappings is None:
- self._mappings = ed.Mappings(self._client, self._index_pattern)
- else:
- self._mappings = mappings
-
- self._index = ed.Index(index_field)
-
- def _es_results_to_pandas(self, results):
+ client=None,
+ index_pattern=None,
+ columns=None,
+ index_field=None,
+ query_compiler=None):
"""
+ pandas.DataFrame/Series like API that proxies into Elasticsearch index(es).
+
Parameters
----------
- results: dict
- Elasticsearch results from self.client.search
-
- Returns
- -------
- df: pandas.DataFrame
- _source values extracted from results and mapped to pandas DataFrame
- dtypes are mapped via Mapping object
-
- Notes
- -----
- Fields containing lists in Elasticsearch don't map easily to pandas.DataFrame
- For example, an index with mapping:
- ```
- "mappings" : {
- "properties" : {
- "group" : {
- "type" : "keyword"
- },
- "user" : {
- "type" : "nested",
- "properties" : {
- "first" : {
- "type" : "keyword"
- },
- "last" : {
- "type" : "keyword"
- }
- }
- }
- }
- }
- ```
- Adding a document:
- ```
- "_source" : {
- "group" : "amsterdam",
- "user" : [
- {
- "first" : "John",
- "last" : "Smith"
- },
- {
- "first" : "Alice",
- "last" : "White"
- }
- ]
- }
- ```
- (https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html)
- this would be transformed internally (in Elasticsearch) into a document that looks more like this:
- ```
- {
- "group" : "amsterdam",
- "user.first" : [ "alice", "john" ],
- "user.last" : [ "smith", "white" ]
- }
- ```
- When mapping this a pandas data frame we mimic this transformation.
-
- Similarly, if a list is added to Elasticsearch:
- ```
- PUT my_index/_doc/1
- {
- "list" : [
- 0, 1, 2
- ]
- }
- ```
- The mapping is:
- ```
- "mappings" : {
- "properties" : {
- "user" : {
- "type" : "long"
- }
- }
- }
- ```
- TODO - explain how lists are handled (https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html)
- TODO - an option here is to use Elasticsearch's multi-field matching instead of pandas treatment of lists (which isn't great)
- NOTE - using this lists is generally not a good way to use this API
+ client : eland.Client
+ A reference to a Elasticsearch python client
"""
- def flatten_dict(y):
- out = {}
+ if query_compiler is None:
+ query_compiler = ElandQueryCompiler(client=client,
+ index_pattern=index_pattern,
+ columns=columns,
+ index_field=index_field)
+ self._query_compiler = query_compiler
- def flatten(x, name=''):
- # We flatten into source fields e.g. if type=geo_point
- # location: {lat=52.38, lon=4.90}
- if name == '':
- is_source_field = False
- pd_dtype = 'object'
- else:
- is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(name[:-1])
+ def _get_index(self):
+ return self._query_compiler.index
- if not is_source_field and type(x) is dict:
- for a in x:
- flatten(x[a], name + a + '.')
- elif not is_source_field and type(x) is list:
- for a in x:
- flatten(a, name)
- elif is_source_field == True: # only print source fields from mappings (TODO - not so efficient for large number of fields and filtered mapping)
- field_name = name[:-1]
+ index = property(_get_index)
- # Coerce types - for now just datetime
- if pd_dtype == 'datetime64[ns]':
- x = pd.to_datetime(x)
+ def _build_repr_df(self, num_rows, num_cols):
+ # Overriden version of BasePandasDataset._build_repr_df
+ # to avoid issues with concat
+ if len(self.index) <= num_rows:
+ return self.to_pandas()
- # Elasticsearch can have multiple values for a field. These are represented as lists, so
- # create lists for this pivot (see notes above)
- if field_name in out:
- if type(out[field_name]) is not list:
- l = [out[field_name]]
- out[field_name] = l
- out[field_name].append(x)
- else:
- out[field_name] = x
+ num_rows = num_rows + 1
- flatten(y)
+ head_rows = int(num_rows / 2) + num_rows % 2
+ tail_rows = num_rows - head_rows
- return out
-
- rows = []
- index = []
- if isinstance(results, dict):
- iterator = results['hits']['hits']
- else:
- iterator = results
-
- for hit in iterator:
- row = hit['_source']
-
- # get index value - can be _id or can be field value in source
- if self._index.is_source_field:
- index_field = row[self._index.index_field]
- else:
- index_field = hit[self._index.index_field]
- index.append(index_field)
-
- # flatten row to map correctly to 2D DataFrame
- rows.append(flatten_dict(row))
-
- # Create pandas DataFrame
- df = pd.DataFrame(data=rows, index=index)
-
- # _source may not contain all columns in the mapping
- # therefore, fill in missing columns
- # (note this returns self.columns NOT IN df.columns)
- missing_columns = list(set(self._columns) - set(df.columns))
-
- for missing in missing_columns:
- is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(missing)
- df[missing] = None
- df[missing].astype(pd_dtype)
-
- # Sort columns in mapping order
- df = df[self._columns]
-
- return df
-
- def _head(self, n=5):
- """
- Protected method that returns head as pandas.DataFrame.
-
- Returns
- -------
- _head
- pandas.DataFrame of top N values
- """
- sort_params = self._index.sort_field + ":asc"
-
- results = self._client.search(index=self._index_pattern, size=n, sort=sort_params)
-
- return self._es_results_to_pandas(results)
-
- def _tail(self, n=5):
- """
- Protected method that returns tail as pandas.DataFrame.
-
- Returns
- -------
- _tail
- pandas.DataFrame of last N values
- """
- sort_params = self._index.sort_field + ":desc"
-
- results = self._client.search(index=self._index_pattern, size=n, sort=sort_params)
-
- df = self._es_results_to_pandas(results)
-
- # reverse order (index ascending)
- return df.sort_index()
-
- def _to_pandas(self):
- """
- Protected method that returns all data as pandas.DataFrame.
-
- Returns
- -------
- df
- pandas.DataFrame of all values
- """
- sort_params = self._index.sort_field + ":asc"
-
- results = self._client.scan(index=self._index_pattern)
-
- # We sort here rather than in scan - once everything is in core this
- # should be faster
- return self._es_results_to_pandas(results)
-
- def _describe(self):
- numeric_source_fields = self._mappings.numeric_source_fields()
-
- # for each field we compute:
- # count, mean, std, min, 25%, 50%, 75%, max
- search = Search(using=self._client, index=self._index_pattern).extra(size=0)
-
- for field in numeric_source_fields:
- search.aggs.metric('extended_stats_' + field, 'extended_stats', field=field)
- search.aggs.metric('percentiles_' + field, 'percentiles', field=field)
-
- response = search.execute()
-
- results = {}
-
- for field in numeric_source_fields:
- values = list()
- values.append(response.aggregations['extended_stats_' + field]['count'])
- values.append(response.aggregations['extended_stats_' + field]['avg'])
- values.append(response.aggregations['extended_stats_' + field]['std_deviation'])
- values.append(response.aggregations['extended_stats_' + field]['min'])
- values.append(response.aggregations['percentiles_' + field]['values']['25.0'])
- values.append(response.aggregations['percentiles_' + field]['values']['50.0'])
- values.append(response.aggregations['percentiles_' + field]['values']['75.0'])
- values.append(response.aggregations['extended_stats_' + field]['max'])
-
- # if not None
- if values.count(None) < len(values):
- results[field] = values
-
- df = pd.DataFrame(data=results, index=['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'])
-
- return df
-
- def _filter_mappings(self, columns):
- mappings = ed.Mappings(mappings=self._mappings, columns=columns)
-
- return mappings
-
- @property
- def columns(self):
- return self._columns
-
- @property
- def index(self):
- return self._index
-
- @property
- def dtypes(self):
- return self._mappings.dtypes()
-
- @property
- def _columns(self):
- return pd.Index(self._mappings.source_fields())
-
- def get_dtype_counts(self):
- return self._mappings.get_dtype_counts()
-
- def _index_count(self):
- """
- Returns
- -------
- index_count: int
- Count of docs where index_field exists
- """
- exists_query = {"query": {"exists": {"field": self._index.index_field}}}
-
- index_count = self._client.count(index=self._index_pattern, body=exists_query)
-
- return index_count
-
- def __len__(self):
- """
- Returns length of info axis, but here we use the index.
- """
- return self._client.count(index=self._index_pattern)
-
- def _fake_head_tail_df(self, max_rows=1):
- """
- Create a 'fake' pd.DataFrame of the entire ed.DataFrame
- by concat head and tail. Used for display.
- """
- head_rows = int(max_rows / 2) + max_rows % 2
- tail_rows = max_rows - head_rows
-
- head = self._head(head_rows)
- tail = self._tail(tail_rows)
+ head = self.head(head_rows).to_pandas()
+ tail = self.tail(tail_rows).to_pandas()
return head.append(tail)
+
+ def to_pandas(self):
+ return self._query_compiler.to_pandas()
diff --git a/eland/operations.py b/eland/operations.py
new file mode 100644
index 0000000..8705c4d
--- /dev/null
+++ b/eland/operations.py
@@ -0,0 +1,232 @@
+from enum import Enum
+
+
+class Operations:
+ """
+ A collector of the queries and selectors we apply to queries to return the appropriate results.
+
+ For example,
+ - a list of the columns in the DataFrame (a subset of columns in the index)
+ - a size limit on the results (e.g. for head(n=5))
+ - a query to filter the results (e.g. df.A > 10)
+
+ This is maintained as a 'task graph' (inspired by dask)
+
+ A task graph is a dictionary mapping keys to computations:
+
+ A key is any hashable value that is not a task:
+ ```
+ {'x': 1,
+ 'y': 2,
+ 'z': (add, 'x', 'y'),
+ 'w': (sum, ['x', 'y', 'z']),
+ 'v': [(sum, ['w', 'z']), 2]}
+ ```
+ (see https://docs.dask.org/en/latest/spec.html)
+ """
+
+ class SortOrder(Enum):
+ ASC = 0
+ DESC = 1
+
+ @staticmethod
+ def reverse(order):
+ if order == Operations.SortOrder.ASC:
+ return Operations.SortOrder.DESC
+
+ return Operations.SortOrder.ASC
+
+ @staticmethod
+ def to_string(order):
+ if order == Operations.SortOrder.ASC:
+ return ":asc"
+
+ return ":desc"
+
+ def __init__(self, tasks=None):
+ if tasks == None:
+ self._tasks = []
+ else:
+ self._tasks = tasks
+
+ def __constructor__(self, *args, **kwargs):
+ return type(self)(*args, **kwargs)
+
+ def copy(self):
+ return self.__constructor__(tasks=self._tasks.copy())
+
+ def head(self, index, n):
+ # Add a task that is an ascending sort with size=n
+ task = ('head', (index.sort_field, n))
+ self._tasks.append(task)
+
+ def tail(self, index, n):
+
+ # Add a task that is descending sort with size=n
+ task = ('tail', (index.sort_field, n))
+ self._tasks.append(task)
+
+ def set_columns(self, columns):
+ self._tasks['columns'] = columns
+
+ def __repr__(self):
+ return repr(self._tasks)
+
+ def to_pandas(self, query_compiler):
+ query, post_processing = self._to_es_query()
+
+ size, sort_params = Operations._query_to_params(query)
+
+ es_results = query_compiler._client.search(
+ index=query_compiler._index_pattern,
+ size=size,
+ sort=sort_params)
+
+ df = query_compiler._es_results_to_pandas(es_results)
+
+ return self._apply_df_post_processing(df, post_processing)
+
+ def to_count(self, query_compiler):
+ query, post_processing = self._to_es_query()
+
+ size = query['query_size'] # can be None
+
+ pp_size = self._count_post_processing(post_processing)
+ if pp_size is not None:
+ if size is not None:
+ size = min(size, pp_size)
+ else:
+ size = pp_size
+
+ # Size is dictated by operations
+ if size is not None:
+ return size
+
+ exists_query = {"query": {"exists": {"field": query_compiler.index.index_field}}}
+
+ return query_compiler._client.count(index=query_compiler._index_pattern, body=exists_query)
+
+ @staticmethod
+ def _query_to_params(query):
+ sort_params = None
+ if query['query_sort_field'] and query['query_sort_order']:
+ sort_params = query['query_sort_field'] + Operations.SortOrder.to_string(query['query_sort_order'])
+
+ size = query['query_size']
+
+ return size, sort_params
+ 1
+ @staticmethod
+ def _count_post_processing(post_processing):
+ size = None
+ for action in post_processing:
+ if action[0] == 'head' or action[0] == 'tail':
+ if size is None or action[1][1] < size:
+ size = action[1][1]
+
+ return size
+
+ @staticmethod
+ def _apply_df_post_processing(df, post_processing):
+ for action in post_processing:
+ print(action)
+ if action == 'sort_index':
+ df = df.sort_index()
+ elif action[0] == 'head':
+ df = df.head(action[1][1])
+ elif action[0] == 'tail':
+ df = df.tail(action[1][1])
+
+ return df
+
+ def _to_es_query(self):
+ # We now try and combine all tasks into an Elasticsearch query
+ # Some operations can be simply combined into a single query
+ # other operations require pre-queries and then combinations
+ # other operations require in-core post-processing of results
+ query = {"query_sort_field": None,
+ "query_sort_order": None,
+ "query_size": None}
+
+ post_processing = []
+
+ for task in self._tasks:
+ if task[0] == 'head':
+ query, post_processing = self._resolve_head(task, query, post_processing)
+ elif task[0] == 'tail':
+ query, post_processing = self._resolve_tail(task, query, post_processing)
+
+ return query, post_processing
+
+ def _resolve_head(self, item, query, post_processing):
+ # head - sort asc, size n
+ # |12345-------------|
+ query_sort_field = item[1][0]
+ query_sort_order = Operations.SortOrder.ASC
+ query_size = item[1][1]
+
+ # If we are already postprocessing the query results, we just get 'head' of these
+ # (note, currently we just append another head, we don't optimise by
+ # overwriting previous head)
+ if len(post_processing) > 0:
+ post_processing.append(item)
+ return query, post_processing
+
+ if query['query_sort_field'] is None:
+ query['query_sort_field'] = query_sort_field
+ # if it is already sorted we use existing field
+
+ if query['query_sort_order'] is None:
+ query['query_sort_order'] = query_sort_order
+ # if it is already sorted we get head of existing order
+
+ if query['query_size'] is None:
+ query['query_size'] = query_size
+ else:
+ # truncate if head is smaller
+ if query_size < query['query_size']:
+ query['query_size'] = query_size
+
+ return query, post_processing
+
+ def _resolve_tail(self, item, query, post_processing):
+ # tail - sort desc, size n, post-process sort asc
+ # |-------------12345|
+ query_sort_field = item[1][0]
+ query_sort_order = Operations.SortOrder.DESC
+ query_size = item[1][1]
+
+ # If this is a tail of a tail adjust settings and return
+ if query['query_size'] is not None and \
+ query['query_sort_order'] == query_sort_order and \
+ post_processing == [('sort_index')]:
+ if query_size < query['query_size']:
+ query['query_size'] = query_size
+ return query, post_processing
+
+ # If we are already postprocessing the query results, just get 'tail' of these
+ # (note, currently we just append another tail, we don't optimise by
+ # overwriting previous tail)
+ if len(post_processing) > 0:
+ post_processing.append(item)
+ return query, post_processing
+
+ # If results are already constrained, just get 'tail' of these
+ # (note, currently we just append another tail, we don't optimise by
+ # overwriting previous tail)
+ if query['query_size'] is not None:
+ post_processing.append(item)
+ return query, post_processing
+ else:
+ query['query_size'] = query_size
+ if query['query_sort_field'] is None:
+ query['query_sort_field'] = query_sort_field
+ if query['query_sort_order'] is None:
+ query['query_sort_order'] = query_sort_order
+ else:
+ # reverse sort order
+ query['query_sort_order'] = Operations.SortOrder.reverse(query_sort_order)
+
+ post_processing.append(('sort_index'))
+
+ return query, post_processing
diff --git a/eland/query_compiler.py b/eland/query_compiler.py
new file mode 100644
index 0000000..49b0125
--- /dev/null
+++ b/eland/query_compiler.py
@@ -0,0 +1,247 @@
+import pandas as pd
+from modin.backends.base.query_compiler import BaseQueryCompiler
+
+from eland import Client
+from eland import Index
+from eland import Mappings
+from eland import Operations
+
+
+class ElandQueryCompiler(BaseQueryCompiler):
+
+ def __init__(self,
+ client=None,
+ index_pattern=None,
+ columns=None,
+ index_field=None,
+ operations=None):
+ self._client = Client(client)
+ self._index_pattern = index_pattern
+
+ # Get and persist mappings, this allows us to correctly
+ # map returned types from Elasticsearch to pandas datatypes
+ self._mappings = Mappings(client=self._client, index_pattern=self._index_pattern)
+
+ self._index = Index(self, index_field)
+
+ if operations is None:
+ self._operations = Operations()
+ else:
+ self._operations = operations
+
+ def _get_index(self):
+ return self._index
+
+ def _get_columns(self):
+ return pd.Index(self._mappings.source_fields())
+
+ columns = property(_get_columns)
+ index = property(_get_index)
+
+ # END Index, columns, and dtypes objects
+
+ def _es_results_to_pandas(self, results):
+ """
+ Parameters
+ ----------
+ results: dict
+ Elasticsearch results from self.client.search
+
+ Returns
+ -------
+ df: pandas.DataFrame
+ _source values extracted from results and mapped to pandas DataFrame
+ dtypes are mapped via Mapping object
+
+ Notes
+ -----
+ Fields containing lists in Elasticsearch don't map easily to pandas.DataFrame
+ For example, an index with mapping:
+ ```
+ "mappings" : {
+ "properties" : {
+ "group" : {
+ "type" : "keyword"
+ },
+ "user" : {
+ "type" : "nested",
+ "properties" : {
+ "first" : {
+ "type" : "keyword"
+ },
+ "last" : {
+ "type" : "keyword"
+ }
+ }
+ }
+ }
+ }
+ ```
+ Adding a document:
+ ```
+ "_source" : {
+ "group" : "amsterdam",
+ "user" : [
+ {
+ "first" : "John",
+ "last" : "Smith"
+ },
+ {
+ "first" : "Alice",
+ "last" : "White"
+ }
+ ]
+ }
+ ```
+ (https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html)
+ this would be transformed internally (in Elasticsearch) into a document that looks more like this:
+ ```
+ {
+ "group" : "amsterdam",
+ "user.first" : [ "alice", "john" ],
+ "user.last" : [ "smith", "white" ]
+ }
+ ```
+ When mapping this a pandas data frame we mimic this transformation.
+
+ Similarly, if a list is added to Elasticsearch:
+ ```
+ PUT my_index/_doc/1
+ {
+ "list" : [
+ 0, 1, 2
+ ]
+ }
+ ```
+ The mapping is:
+ ```
+ "mappings" : {
+ "properties" : {
+ "user" : {
+ "type" : "long"
+ }
+ }
+ }
+ ```
+ TODO - explain how lists are handled (https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html)
+ TODO - an option here is to use Elasticsearch's multi-field matching instead of pandas treatment of lists (which isn't great)
+ NOTE - using this lists is generally not a good way to use this API
+ """
+
+ def flatten_dict(y):
+ out = {}
+
+ def flatten(x, name=''):
+ # We flatten into source fields e.g. if type=geo_point
+ # location: {lat=52.38, lon=4.90}
+ if name == '':
+ is_source_field = False
+ pd_dtype = 'object'
+ else:
+ is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(name[:-1])
+
+ if not is_source_field and type(x) is dict:
+ for a in x:
+ flatten(x[a], name + a + '.')
+ elif not is_source_field and type(x) is list:
+ for a in x:
+ flatten(a, name)
+ elif is_source_field == True: # only print source fields from mappings (TODO - not so efficient for large number of fields and filtered mapping)
+ field_name = name[:-1]
+
+ # Coerce types - for now just datetime
+ if pd_dtype == 'datetime64[ns]':
+ x = pd.to_datetime(x)
+
+ # Elasticsearch can have multiple values for a field. These are represented as lists, so
+ # create lists for this pivot (see notes above)
+ if field_name in out:
+ if type(out[field_name]) is not list:
+ l = [out[field_name]]
+ out[field_name] = l
+ out[field_name].append(x)
+ else:
+ out[field_name] = x
+
+ flatten(y)
+
+ return out
+
+ rows = []
+ index = []
+ if isinstance(results, dict):
+ iterator = results['hits']['hits']
+ else:
+ iterator = results
+
+ for hit in iterator:
+ row = hit['_source']
+
+ # get index value - can be _id or can be field value in source
+ if self._index.is_source_field:
+ index_field = row[self._index.index_field]
+ else:
+ index_field = hit[self._index.index_field]
+ index.append(index_field)
+
+ # flatten row to map correctly to 2D DataFrame
+ rows.append(flatten_dict(row))
+
+ # Create pandas DataFrame
+ df = pd.DataFrame(data=rows, index=index)
+
+ # _source may not contain all columns in the mapping
+ # therefore, fill in missing columns
+ # (note this returns self.columns NOT IN df.columns)
+ missing_columns = list(set(self.columns) - set(df.columns))
+
+ for missing in missing_columns:
+ is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(missing)
+ df[missing] = None
+ df[missing].astype(pd_dtype)
+
+ # Sort columns in mapping order
+ df = df[self.columns]
+
+ return df
+
+ def _index_count(self):
+ """
+ Returns
+ -------
+ index_count: int
+ Count of docs where index_field exists
+ """
+ return self._operations.to_count(self)
+
+ def copy(self):
+ return self.__constructor__(
+ client=self._client,
+ index_pattern=self._index_pattern,
+ columns=self.columns,
+ index_field=self._index.index_field,
+ operations=self._operations.copy()
+ )
+
+ def head(self, n):
+ result = self.copy()
+
+ result._operations.head(self._index, n)
+
+ return result
+
+ def tail(self, n):
+ result = self.copy()
+
+ result._operations.tail(self._index, n)
+
+ return result
+
+ # To/From Pandas
+ def to_pandas(self):
+ """Converts Eland DataFrame to Pandas DataFrame.
+
+ Returns:
+ Pandas DataFrame
+ """
+ return self._operations.to_pandas(self)
diff --git a/eland/series.py b/eland/series.py
index 47473ed..e69de29 100644
--- a/eland/series.py
+++ b/eland/series.py
@@ -1,402 +0,0 @@
-"""
-Series
----------
-One-dimensional ndarray with axis labels (including time series).
-
-The underlying data resides in Elasticsearch and the API aligns as much as
-possible with pandas.DataFrame API.
-
-This allows the eland.Series to access large datasets stored in Elasticsearch,
-without storing the dataset in local memory.
-
-Implementation Details
-----------------------
-Based on NDFrame which underpins eland.1DataFrame
-
-"""
-import sys
-
-import pandas as pd
-import pandas.compat as compat
-from pandas.compat import StringIO
-from pandas.core.dtypes.common import (
- is_categorical_dtype)
-from pandas.io.formats import format as fmt
-from pandas.io.formats.printing import pprint_thing
-
-from eland import Index
-from eland import NDFrame
-
-
-class Series(NDFrame):
- """
- pandas.Series like API that proxies into Elasticsearch index(es).
-
- Parameters
- ----------
- client : eland.Client
- A reference to a Elasticsearch python client
-
- index_pattern : str
- An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*).
-
- field_name : str
- The field to base the series on
-
- See Also
- --------
-
- Examples
- --------
-
- import eland as ed
- client = ed.Client(Elasticsearch())
- s = ed.DataFrame(client, 'reviews', 'date')
- df.head()
- reviewerId vendorId rating date
- 0 0 0 5 2006-04-07 17:08
- 1 1 1 5 2006-05-04 12:16
- 2 2 2 4 2006-04-21 12:26
- 3 3 3 5 2006-04-18 15:48
- 4 3 4 5 2006-04-18 15:49
-
- Notice that the types are based on Elasticsearch mappings
-
- Notes
- -----
- If the Elasticsearch index is deleted or index mappings are changed after this
- object is created, the object is not rebuilt and so inconsistencies can occur.
-
- """
-
- def __init__(self,
- client,
- index_pattern,
- field_name=None,
- mappings=None,
- index_field=None):
- # python 3 syntax
- super().__init__(client, index_pattern, mappings=mappings, index_field=index_field)
-
- # now select column (field_name)
- if field_name is not None:
- self._mappings = self._filter_mappings([field_name])
- elif len(self._mappings.source_fields()) != 1:
- raise TypeError('Series must have 1 field: [{0}]'.format(len(self._mappings.source_fields())))
-
- def head(self, n=5):
- return self._df_to_series(super()._head(n))
-
- def tail(self, n=5):
- return self._df_to_series(super()._tail(n))
-
- def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
- null_counts=None):
- """
- Print a concise summary of a DataFrame.
-
- This method prints information about a DataFrame including
- the index dtype and column dtypes, non-null values and memory usage.
-
- This copies a lot of code from pandas.DataFrame.info as it is difficult
- to split out the appropriate code or creating a SparseDataFrame gives
- incorrect results on types and counts.
- """
- if buf is None: # pragma: no cover
- buf = sys.stdout
-
- lines = []
-
- lines.append(str(type(self)))
- lines.append(self._index_summary())
-
- if len(self.columns) == 0:
- lines.append('Empty {name}'.format(name=type(self).__name__))
- fmt.buffer_put_lines(buf, lines)
- return
-
- cols = self.columns
-
- # hack
- if max_cols is None:
- max_cols = pd.get_option('display.max_info_columns',
- len(self.columns) + 1)
-
- max_rows = pd.get_option('display.max_info_rows', len(self) + 1)
-
- if null_counts is None:
- show_counts = ((len(self.columns) <= max_cols) and
- (len(self) < max_rows))
- else:
- show_counts = null_counts
- exceeds_info_cols = len(self.columns) > max_cols
-
- def _verbose_repr():
- lines.append('Data columns (total %d columns):' %
- len(self.columns))
- space = max(len(pprint_thing(k)) for k in self.columns) + 4
- counts = None
-
- tmpl = "{count}{dtype}"
- if show_counts:
- counts = self.count()
- if len(cols) != len(counts): # pragma: no cover
- raise AssertionError(
- 'Columns must equal counts '
- '({cols:d} != {counts:d})'.format(
- cols=len(cols), counts=len(counts)))
- tmpl = "{count} non-null {dtype}"
-
- dtypes = self.dtypes
- for i, col in enumerate(self._columns):
- dtype = dtypes.iloc[i]
- col = pprint_thing(col)
-
- count = ""
- if show_counts:
- count = counts.iloc[i]
-
- lines.append(_put_str(col, space) + tmpl.format(count=count,
- dtype=dtype))
-
- def _non_verbose_repr():
- lines.append(self._columns._summary(name='Columns'))
-
- def _sizeof_fmt(num, size_qualifier):
- # returns size in human readable format
- for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
- if num < 1024.0:
- return ("{num:3.1f}{size_q} "
- "{x}".format(num=num, size_q=size_qualifier, x=x))
- num /= 1024.0
- return "{num:3.1f}{size_q} {pb}".format(num=num,
- size_q=size_qualifier,
- pb='PB')
-
- if verbose:
- _verbose_repr()
- elif verbose is False: # specifically set to False, not nesc None
- _non_verbose_repr()
- else:
- if exceeds_info_cols:
- _non_verbose_repr()
- else:
- _verbose_repr()
-
- counts = self.get_dtype_counts()
- dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k
- in sorted(counts.items())]
- lines.append('dtypes: {types}'.format(types=', '.join(dtypes)))
-
- if memory_usage is None:
- memory_usage = pd.get_option('display.memory_usage')
- if memory_usage:
- # append memory usage of df to display
- size_qualifier = ''
-
- # TODO - this is different from pd.DataFrame as we shouldn't
- # really hold much in memory. For now just approximate with getsizeof + ignore deep
- mem_usage = sys.getsizeof(self)
- lines.append("memory usage: {mem}\n".format(
- mem=_sizeof_fmt(mem_usage, size_qualifier)))
-
- fmt.buffer_put_lines(buf, lines)
-
- @property
- def name(self):
- return list(self._mappings.source_fields())[0]
-
- @property
- def shape(self):
- """
- Return a tuple representing the dimensionality of the DataFrame.
-
- Returns
- -------
- shape: tuple
- 0 - number of rows
- 1 - number of columns
- """
- num_rows = len(self)
- num_columns = len(self._columns)
-
- return num_rows, num_columns
-
- @property
- def set_index(self, index_field):
- copy = self.copy()
- copy._index = Index(index_field)
- return copy
-
- def _index_summary(self):
- head = self.head(1).index[0]
- tail = self.tail(1).index[0]
- index_summary = ', %s to %s' % (pprint_thing(head),
- pprint_thing(tail))
-
- name = "Index"
- return '%s: %s entries%s' % (name, len(self), index_summary)
-
- def count(self):
- """
- Count non-NA cells for each column (TODO row)
-
- Counts are based on exists queries against ES
-
- This is inefficient, as it creates N queries (N is number of fields).
-
- An alternative approach is to use value_count aggregations. However, they have issues in that:
- 1. They can only be used with aggregatable fields (e.g. keyword not text)
- 2. For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2
- for a single document.
- """
- counts = {}
- for field in self._mappings.source_fields():
- exists_query = {"query": {"exists": {"field": field}}}
- field_exists_count = self._client.count(index=self._index_pattern, body=exists_query)
- counts[field] = field_exists_count
-
- count = pd.Series(data=counts, index=self._mappings.source_fields())
-
- return count
-
- def describe(self):
- return super()._describe()
-
- def _df_to_series(self, df):
- return df[self.name]
-
- # ----------------------------------------------------------------------
- # Rendering Methods
- def __repr__(self):
- """
- From pandas
- """
- buf = StringIO()
-
- max_rows = pd.get_option("display.max_rows")
-
- self.to_string(buf=buf, na_rep='NaN', float_format=None, header=True, index=True, length=True,
- dtype=True, name=True, max_rows=max_rows)
-
- return buf.getvalue()
-
- def to_string(self, buf=None, na_rep='NaN',
- float_format=None, header=True,
- index=True, length=True, dtype=True,
- name=True, max_rows=None):
- """
- From pandas 0.24.2
-
- Render a string representation of the Series.
-
- Parameters
- ----------
- buf : StringIO-like, optional
- buffer to write to
- na_rep : string, optional
- string representation of NAN to use, default 'NaN'
- float_format : one-parameter function, optional
- formatter function to apply to columns' elements if they are floats
- default None
- header : boolean, default True
- Add the Series header (index name)
- index : bool, optional
- Add index (row) labels, default True
- length : boolean, default False
- Add the Series length
- dtype : boolean, default False
- Add the Series dtype
- name : boolean, default False
- Add the Series name if not None
- max_rows : int, optional
- Maximum number of rows to show before truncating. If None, show
- all.
-
- Returns
- -------
- formatted : string (if not buffer passed)
- """
- if max_rows == None:
- max_rows = pd.get_option("display.max_rows")
-
- df = self._fake_head_tail_df(max_rows=max_rows + 1)
-
- s = self._df_to_series(df)
-
- formatter = Series.SeriesFormatter(s, len(self), name=name, length=length,
- header=header, index=index,
- dtype=dtype, na_rep=na_rep,
- float_format=float_format,
- max_rows=max_rows)
- result = formatter.to_string()
-
- # catch contract violations
- if not isinstance(result, compat.text_type):
- raise AssertionError("result must be of type unicode, type"
- " of result is {0!r}"
- "".format(result.__class__.__name__))
-
- if buf is None:
- return result
- else:
- try:
- buf.write(result)
- except AttributeError:
- with open(buf, 'w') as f:
- f.write(result)
-
- class SeriesFormatter(fmt.SeriesFormatter):
- """
- A hacked overridden version of pandas.io.formats.SeriesFormatter that writes correct length
- """
- def __init__(self, series, series_length, buf=None, length=True, header=True, index=True,
- na_rep='NaN', name=False, float_format=None, dtype=True,
- max_rows=None):
- super().__init__(series, buf=buf, length=length, header=header, index=index,
- na_rep=na_rep, name=name, float_format=float_format, dtype=dtype,
- max_rows=max_rows)
- self._series_length = series_length
-
- def _get_footer(self):
- """
- Overridden with length change
- (from pandas 0.24.2 io.formats.SeriesFormatter)
- """
- name = self.series.name
- footer = ''
-
- if getattr(self.series.index, 'freq', None) is not None:
- footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr)
-
- if self.name is not False and name is not None:
- if footer:
- footer += ', '
-
- series_name = pprint_thing(name,
- escape_chars=('\t', '\r', '\n'))
- footer += ("Name: {sname}".format(sname=series_name)
- if name is not None else "")
-
- if (self.length is True or
- (self.length == 'truncate' and self.truncate_v)):
- if footer:
- footer += ', '
- footer += 'Length: {length}'.format(length=self._series_length)
-
- if self.dtype is not False and self.dtype is not None:
- name = getattr(self.tr_series.dtype, 'name', None)
- if name:
- if footer:
- footer += ', '
- footer += 'dtype: {typ}'.format(typ=pprint_thing(name))
-
- # level infos are added to the end and in a new line, like it is done
- # for Categoricals
- if is_categorical_dtype(self.tr_series.dtype):
- level_info = self.tr_series._values._repr_categories_info()
- if footer:
- footer += "\n"
- footer += level_info
-
- return compat.text_type(footer)
diff --git a/eland/tests/__init__.py b/eland/tests/__init__.py
deleted file mode 100644
index c98fe4f..0000000
--- a/eland/tests/__init__.py
+++ /dev/null
@@ -1,486 +0,0 @@
-import os
-import pandas as pd
-
-ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
-
-# Define test files and indices
-ELASTICSEARCH_HOST = 'localhost' # TODO externalise this
-
-FLIGHTS_INDEX_NAME = 'flights'
-FLIGHTS_MAPPING = { "mappings" : {
- "properties" : {
- "AvgTicketPrice" : {
- "type" : "float"
- },
- "Cancelled" : {
- "type" : "boolean"
- },
- "Carrier" : {
- "type" : "keyword"
- },
- "Dest" : {
- "type" : "keyword"
- },
- "DestAirportID" : {
- "type" : "keyword"
- },
- "DestCityName" : {
- "type" : "keyword"
- },
- "DestCountry" : {
- "type" : "keyword"
- },
- "DestLocation" : {
- "type" : "geo_point"
- },
- "DestRegion" : {
- "type" : "keyword"
- },
- "DestWeather" : {
- "type" : "keyword"
- },
- "DistanceKilometers" : {
- "type" : "float"
- },
- "DistanceMiles" : {
- "type" : "float"
- },
- "FlightDelay" : {
- "type" : "boolean"
- },
- "FlightDelayMin" : {
- "type" : "integer"
- },
- "FlightDelayType" : {
- "type" : "keyword"
- },
- "FlightNum" : {
- "type" : "keyword"
- },
- "FlightTimeHour" : {
- "type" : "float"
- },
- "FlightTimeMin" : {
- "type" : "float"
- },
- "Origin" : {
- "type" : "keyword"
- },
- "OriginAirportID" : {
- "type" : "keyword"
- },
- "OriginCityName" : {
- "type" : "keyword"
- },
- "OriginCountry" : {
- "type" : "keyword"
- },
- "OriginLocation" : {
- "type" : "geo_point"
- },
- "OriginRegion" : {
- "type" : "keyword"
- },
- "OriginWeather" : {
- "type" : "keyword"
- },
- "dayOfWeek" : {
- "type" : "integer"
- },
- "timestamp" : {
- "type" : "date"
- }
- }
- } }
-FLIGHTS_FILE_NAME = ROOT_DIR + '/flights.json.gz'
-FLIGHTS_DF_FILE_NAME = ROOT_DIR + '/flights_df.json.gz'
-
-ECOMMERCE_INDEX_NAME = 'ecommerce'
-ECOMMERCE_MAPPING = { "mappings" : {
- "properties" : {
- "category" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword"
- }
- }
- },
- "currency" : {
- "type" : "keyword"
- },
- "customer_birth_date" : {
- "type" : "date"
- },
- "customer_first_name" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword",
- "ignore_above" : 256
- }
- }
- },
- "customer_full_name" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword",
- "ignore_above" : 256
- }
- }
- },
- "customer_gender" : {
- "type" : "keyword"
- },
- "customer_id" : {
- "type" : "keyword"
- },
- "customer_last_name" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword",
- "ignore_above" : 256
- }
- }
- },
- "customer_phone" : {
- "type" : "keyword"
- },
- "day_of_week" : {
- "type" : "keyword"
- },
- "day_of_week_i" : {
- "type" : "integer"
- },
- "email" : {
- "type" : "keyword"
- },
- "geoip" : {
- "properties" : {
- "city_name" : {
- "type" : "keyword"
- },
- "continent_name" : {
- "type" : "keyword"
- },
- "country_iso_code" : {
- "type" : "keyword"
- },
- "location" : {
- "type" : "geo_point"
- },
- "region_name" : {
- "type" : "keyword"
- }
- }
- },
- "manufacturer" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword"
- }
- }
- },
- "order_date" : {
- "type" : "date"
- },
- "order_id" : {
- "type" : "keyword"
- },
- "products" : {
- "properties" : {
- "_id" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword",
- "ignore_above" : 256
- }
- }
- },
- "base_price" : {
- "type" : "half_float"
- },
- "base_unit_price" : {
- "type" : "half_float"
- },
- "category" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword"
- }
- }
- },
- "created_on" : {
- "type" : "date"
- },
- "discount_amount" : {
- "type" : "half_float"
- },
- "discount_percentage" : {
- "type" : "half_float"
- },
- "manufacturer" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword"
- }
- }
- },
- "min_price" : {
- "type" : "half_float"
- },
- "price" : {
- "type" : "half_float"
- },
- "product_id" : {
- "type" : "long"
- },
- "product_name" : {
- "type" : "text",
- "fields" : {
- "keyword" : {
- "type" : "keyword"
- }
- },
- "analyzer" : "english"
- },
- "quantity" : {
- "type" : "integer"
- },
- "sku" : {
- "type" : "keyword"
- },
- "tax_amount" : {
- "type" : "half_float"
- },
- "taxful_price" : {
- "type" : "half_float"
- },
- "taxless_price" : {
- "type" : "half_float"
- },
- "unit_discount_amount" : {
- "type" : "half_float"
- }
- }
- },
- "sku" : {
- "type" : "keyword"
- },
- "taxful_total_price" : {
- "type" : "half_float"
- },
- "taxless_total_price" : {
- "type" : "half_float"
- },
- "total_quantity" : {
- "type" : "integer"
- },
- "total_unique_products" : {
- "type" : "integer"
- },
- "type" : {
- "type" : "keyword"
- },
- "user" : {
- "type" : "keyword"
- }
- }
- } }
-ECOMMERCE_FILE_NAME = ROOT_DIR + '/ecommerce.json.gz'
-ECOMMERCE_DF_FILE_NAME = ROOT_DIR + '/ecommerce_df.json.gz'
-
-TEST_MAPPING1 = {
- 'mappings': {
- 'properties': {
- 'city': {
- 'type': 'text',
- 'fields': {
- 'raw': {
- 'type': 'keyword'
- }
- }
- },
- 'text': {
- 'type': 'text',
- 'fields': {
- 'english': {
- 'type': 'text',
- 'analyzer': 'english'
- }
- }
- },
- 'origin_location': {
- 'properties': {
- 'lat': {
- 'type': 'text',
- 'index_prefixes': {},
- 'fields': {
- 'keyword': {
- 'type': 'keyword',
- 'ignore_above': 256
- }
- }
- },
- 'lon': {
- 'type': 'text',
- 'fields': {
- 'keyword': {
- 'type': 'keyword',
- 'ignore_above': 256
- }
- }
- }
- }
- },
- 'maps-telemetry': {
- 'properties': {
- 'attributesPerMap': {
- 'properties': {
- 'dataSourcesCount': {
- 'properties': {
- 'avg': {
- 'type': 'long'
- },
- 'max': {
- 'type': 'long'
- },
- 'min': {
- 'type': 'long'
- }
- }
- },
- 'emsVectorLayersCount': {
- 'dynamic': 'true',
- 'properties': {
- 'france_departments': {
- 'properties': {
- 'avg': {
- 'type': 'float'
- },
- 'max': {
- 'type': 'long'
- },
- 'min': {
- 'type': 'long'
- }
- }
- }
- }
- }
- }
- }
- }
- },
- 'type': {
- 'type': 'keyword'
- },
- 'name': {
- 'type': 'text'
- },
- 'user_name': {
- 'type': 'keyword'
- },
- 'email': {
- 'type': 'keyword'
- },
- 'content': {
- 'type': 'text'
- },
- 'tweeted_at': {
- 'type': 'date'
- },
- 'dest_location': {
- 'type': 'geo_point'
- },
- 'my_join_field': {
- 'type': 'join',
- 'relations': {
- 'question': ['answer', 'comment'],
- 'answer': 'vote'
- }
- }
- }
- }
- }
-
-TEST_MAPPING1_INDEX_NAME = 'mapping1'
-
-TEST_MAPPING1_EXPECTED = {
- 'city': 'text',
- 'city.raw': 'keyword',
- 'content': 'text',
- 'dest_location': 'geo_point',
- 'email': 'keyword',
- 'maps-telemetry.attributesPerMap.dataSourcesCount.avg': 'long',
- 'maps-telemetry.attributesPerMap.dataSourcesCount.max': 'long',
- 'maps-telemetry.attributesPerMap.dataSourcesCount.min': 'long',
- 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.avg': 'float',
- 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.max': 'long',
- 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.min': 'long',
- 'my_join_field': 'join',
- 'name': 'text',
- 'origin_location.lat': 'text',
- 'origin_location.lat.keyword': 'keyword',
- 'origin_location.lon': 'text',
- 'origin_location.lon.keyword': 'keyword',
- 'text': 'text',
- 'text.english': 'text',
- 'tweeted_at': 'date',
- 'type': 'keyword',
- 'user_name': 'keyword'
-}
-
-TEST_MAPPING1_EXPECTED_DF = pd.DataFrame.from_dict(data=TEST_MAPPING1_EXPECTED, orient='index', columns=['es_dtype'])
-TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF = TEST_MAPPING1_EXPECTED_DF.drop(index=['city.raw',
- 'origin_location.lat.keyword',
- 'origin_location.lon.keyword',
- 'text.english'])
-TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT = len(TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF.index)
-
-TEST_NESTED_USER_GROUP_INDEX_NAME = 'nested_user_group'
-TEST_NESTED_USER_GROUP_MAPPING = {
- 'mappings': {
- 'properties': {
- 'group': {
- 'type': 'keyword'
- },
- 'user': {
- 'properties': {
- 'first': {
- 'type': 'keyword'
- },
- 'last': {
- 'type': 'keyword'
- },
- 'address' : {
- 'type' : 'keyword'
- }
- }
- }
- }
-}
-}
-
-TEST_NESTED_USER_GROUP_DOCS = [
-{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME,
-'_source':
- {'group':'amsterdam','user':[
- {'first':'Manke','last':'Nelis','address':['Elandsgracht', 'Amsterdam']},
- {'first':'Johnny','last':'Jordaan','address':['Elandsstraat', 'Amsterdam']}]}},
-{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME,
-'_source':
- {'group':'london','user':[
- {'first':'Alice','last':'Monkton'},
- {'first':'Jimmy','last':'White','address':['London']}]}},
-{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME,
-'_source':{'group':'new york','user':[
- {'first':'Bill','last':'Jones'}]}}
-]
-
diff --git a/eland/tests/client/__init__.py b/eland/tests/client/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/eland/tests/client/test_mappings_pytest.py b/eland/tests/client/test_mappings_pytest.py
deleted file mode 100644
index c19d43f..0000000
--- a/eland/tests/client/test_mappings_pytest.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# File called _pytest for PyCharm compatability
-
-import numpy as np
-from pandas.util.testing import (
- assert_series_equal, assert_frame_equal)
-
-import eland as ed
-from eland.tests import *
-from eland.tests.common import TestData
-
-
-class TestMapping(TestData):
-
- # Requires 'setup_tests.py' to be run prior to this
- def test_fields(self):
- mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
-
- assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields()
-
- assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings._mappings_capabilities['es_dtype']))
-
- assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields()
-
- def test_copy(self):
- mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
-
- assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields()
- assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings._mappings_capabilities['es_dtype']))
- assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields()
-
- # Pick 1 source field
- columns = ['dest_location']
- mappings_copy1 = ed.Mappings(mappings=mappings, columns=columns)
-
- assert columns == mappings_copy1.all_fields()
- assert len(columns) == mappings_copy1.count_source_fields()
-
- # Pick 3 source fields (out of order)
- columns = ['dest_location', 'city', 'user_name']
- mappings_copy2 = ed.Mappings(mappings=mappings, columns=columns)
-
- assert columns == mappings_copy2.all_fields()
- assert len(columns) == mappings_copy2.count_source_fields()
-
- # Check original is still ok
- assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields()
- assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings._mappings_capabilities['es_dtype']))
- assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields()
-
- def test_dtypes(self):
- mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
-
- expected_dtypes = pd.Series(
- {'city': 'object', 'content': 'object', 'dest_location': 'object', 'email': 'object',
- 'maps-telemetry.attributesPerMap.dataSourcesCount.avg': 'int64',
- 'maps-telemetry.attributesPerMap.dataSourcesCount.max': 'int64',
- 'maps-telemetry.attributesPerMap.dataSourcesCount.min': 'int64',
- 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.avg': 'float64',
- 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.max': 'int64',
- 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.min': 'int64',
- 'my_join_field': 'object', 'name': 'object', 'origin_location.lat': 'object',
- 'origin_location.lon': 'object', 'text': 'object', 'tweeted_at': 'datetime64[ns]',
- 'type': 'object', 'user_name': 'object'})
-
- assert_series_equal(expected_dtypes, mappings.dtypes())
-
- def test_get_dtype_counts(self):
- mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
-
- expected_get_dtype_counts = pd.Series({'datetime64[ns]': 1, 'float64': 1, 'int64': 5, 'object': 11})
-
- assert_series_equal(expected_get_dtype_counts, mappings.get_dtype_counts())
-
- def test_mapping_capabilities(self):
- mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
-
- field_capabilities = mappings.field_capabilities('city')
-
- assert True == field_capabilities['_source']
- assert 'text' == field_capabilities['es_dtype']
- assert 'object' == field_capabilities['pd_dtype']
- assert True == field_capabilities['searchable']
- assert False == field_capabilities['aggregatable']
-
- field_capabilities = mappings.field_capabilities('city.raw')
-
- assert False == field_capabilities['_source']
- assert 'keyword' == field_capabilities['es_dtype']
- assert 'object' == field_capabilities['pd_dtype']
- assert True == field_capabilities['searchable']
- assert True == field_capabilities['aggregatable']
-
- def test_generate_es_mappings(self):
- df = pd.DataFrame(data={'A': np.random.rand(3),
- 'B': 1,
- 'C': 'foo',
- 'D': pd.Timestamp('20190102'),
- 'E': [1.0, 2.0, 3.0],
- 'F': False,
- 'G': [1, 2, 3]},
- index=['0','1','2'])
-
- expected_mappings = {'mappings': {
- 'properties': {'A': {'type': 'double'},
- 'B': {'type': 'long'},
- 'C': {'type': 'keyword'},
- 'D': {'type': 'date'},
- 'E': {'type': 'double'},
- 'F': {'type': 'boolean'},
- 'G': {'type': 'long'}}}}
-
- mappings = ed.Mappings._generate_es_mappings(df)
-
- assert expected_mappings == mappings
-
- # Now create index
- index_name = 'eland_test_generate_es_mappings'
-
- ed.pandas_to_es(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
-
- ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
- ed_df_head = ed_df.head()
-
- assert_frame_equal(df, ed_df_head)
diff --git a/eland/tests/common.py b/eland/tests/common.py
deleted file mode 100644
index 25d1ad7..0000000
--- a/eland/tests/common.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import pytest
-
-import eland as ed
-
-import pandas as pd
-
-import os
-
-ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
-
-# Create pandas and eland data frames
-from eland.tests import ELASTICSEARCH_HOST
-from eland.tests import FLIGHTS_DF_FILE_NAME, FLIGHTS_INDEX_NAME,\
- ECOMMERCE_DF_FILE_NAME, ECOMMERCE_INDEX_NAME
-
-_pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index()
-_pd_flights['timestamp'] = \
- pd.to_datetime(_pd_flights['timestamp'])
-_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
-_ed_flights = ed.read_es(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME)
-
-_pd_ecommerce = pd.read_json(ECOMMERCE_DF_FILE_NAME).sort_index()
-_pd_ecommerce['order_date'] = \
- pd.to_datetime(_pd_ecommerce['order_date'])
-_pd_ecommerce['products.created_on'] = \
- _pd_ecommerce['products.created_on'].apply(lambda x: pd.to_datetime(x))
-_pd_ecommerce.insert(2, 'customer_birth_date', None)
-_pd_ecommerce.index = _pd_ecommerce.index.map(str) # make index 'object' not int
-_pd_ecommerce['customer_birth_date'].astype('datetime64')
-_ed_ecommerce = ed.read_es(ELASTICSEARCH_HOST, ECOMMERCE_INDEX_NAME)
-
-class TestData:
-
- def pd_flights(self):
- return _pd_flights
-
- def ed_flights(self):
- return _ed_flights
-
- def pd_ecommerce(self):
- return _pd_ecommerce
-
- def ed_ecommerce(self):
- return _ed_ecommerce
diff --git a/eland/tests/dataframe/test_basics_pytest.py b/eland/tests/dataframe/test_basics_pytest.py
deleted file mode 100644
index dce92e9..0000000
--- a/eland/tests/dataframe/test_basics_pytest.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# File called _pytest for PyCharm compatability
-from eland.tests.common import TestData
-
-import pandas as pd
-import io
-
-from pandas.util.testing import (
- assert_series_equal, assert_frame_equal)
-
-class TestDataFrameBasics(TestData):
-
- def test_mapping(self):
- ed_flights_mappings = pd.DataFrame(self.ed_flights()._mappings._mappings_capabilities
- [self.ed_flights()._mappings._mappings_capabilities._source==True]
- ['pd_dtype'])
- pd_flights_mappings = pd.DataFrame(self.pd_flights().dtypes, columns = ['pd_dtype'])
-
- assert_frame_equal(pd_flights_mappings, ed_flights_mappings)
-
- # We don't compare ecommerce here as the default dtypes in pandas from read_json
- # don't match the mapping types. This is mainly because the products field is
- # nested and so can be treated as a multi-field in ES, but not in pandas
-
- def test_head(self):
- pd_flights_head = self.pd_flights().head()
- ed_flights_head = self.ed_flights().head()
-
- print(ed_flights_head)
-
- assert_frame_equal(pd_flights_head, ed_flights_head)
-
- pd_ecommerce_head = self.pd_ecommerce().head()
- ed_ecommerce_head = self.ed_ecommerce().head()
-
- assert_frame_equal(pd_ecommerce_head, ed_ecommerce_head)
-
- def test_tail(self):
- pd_flights_tail = self.pd_flights().tail()
- ed_flights_tail = self.ed_flights().tail()
-
- print(ed_flights_tail)
-
- assert_frame_equal(pd_flights_tail, ed_flights_tail)
-
- pd_ecommerce_tail = self.pd_ecommerce().tail()
- ed_ecommerce_tail = self.ed_ecommerce().tail()
-
- assert_frame_equal(pd_ecommerce_tail, ed_ecommerce_tail)
-
- def test_describe(self):
- pd_flights_describe = self.pd_flights().describe()
- ed_flights_describe = self.ed_flights().describe()
-
- print(ed_flights_describe)
-
- # TODO - this fails now as ES aggregations are approximate
- # if ES percentile agg uses
- # "hdr": {
- # "number_of_significant_value_digits": 3
- # }
- # this works
- #assert_almost_equal(pd_flights_describe, ed_flights_describe)
-
- pd_ecommerce_describe = self.pd_ecommerce().describe()
- ed_ecommerce_describe = self.ed_ecommerce().describe()
-
- print(ed_ecommerce_describe)
-
- # We don't compare ecommerce here as the default dtypes in pandas from read_json
- # don't match the mapping types. This is mainly because the products field is
- # nested and so can be treated as a multi-field in ES, but not in pandas
-
- def test_size(self):
- assert self.pd_flights().shape == self.ed_flights().shape
- assert len(self.pd_flights()) == len(self.ed_flights())
-
- def test_to_string(self):
- print(self.ed_flights())
- print(self.ed_flights().to_string())
-
- def test_info(self):
- ed_flights_info_buf = io.StringIO()
- pd_flights_info_buf = io.StringIO()
-
- self.ed_flights().info(buf=ed_flights_info_buf)
- self.pd_flights().info(buf=pd_flights_info_buf)
-
- print(ed_flights_info_buf.getvalue())
-
- ed_flights_info = (ed_flights_info_buf.getvalue().splitlines())
- pd_flights_info = (pd_flights_info_buf.getvalue().splitlines())
-
- flights_diff = set(ed_flights_info).symmetric_difference(set(pd_flights_info))
-
- ed_ecommerce_info_buf = io.StringIO()
- pd_ecommerce_info_buf = io.StringIO()
-
- self.ed_ecommerce().info(buf=ed_ecommerce_info_buf)
- self.pd_ecommerce().info(buf=pd_ecommerce_info_buf)
-
- ed_ecommerce_info = (ed_ecommerce_info_buf.getvalue().splitlines())
- pd_ecommerce_info = (pd_ecommerce_info_buf.getvalue().splitlines())
-
- # We don't compare ecommerce here as the default dtypes in pandas from read_json
- # don't match the mapping types. This is mainly because the products field is
- # nested and so can be treated as a multi-field in ES, but not in pandas
- ecommerce_diff = set(ed_ecommerce_info).symmetric_difference(set(pd_ecommerce_info))
-
-
- def test_count(self):
- pd_flights_count = self.pd_flights().count()
- ed_flights_count = self.ed_flights().count()
-
- assert_series_equal(pd_flights_count, ed_flights_count)
-
- pd_ecommerce_count = self.pd_ecommerce().count()
- ed_ecommerce_count = self.ed_ecommerce().count()
-
- assert_series_equal(pd_ecommerce_count, ed_ecommerce_count)
-
- def test_get_dtype_counts(self):
- pd_flights_get_dtype_counts = self.pd_flights().get_dtype_counts().sort_index()
- ed_flights_get_dtype_counts = self.ed_flights().get_dtype_counts().sort_index()
-
- assert_series_equal(pd_flights_get_dtype_counts, ed_flights_get_dtype_counts)
-
- def test_get_properties(self):
- pd_flights_shape = self.pd_flights().shape
- ed_flights_shape = self.ed_flights().shape
-
- assert pd_flights_shape == ed_flights_shape
-
- pd_flights_columns = self.pd_flights().columns
- ed_flights_columns = self.ed_flights().columns
-
- assert pd_flights_columns.tolist() == ed_flights_columns.tolist()
-
- pd_flights_dtypes = self.pd_flights().dtypes
- ed_flights_dtypes = self.ed_flights().dtypes
-
- assert_series_equal(pd_flights_dtypes, ed_flights_dtypes)
-
- def test_index(self):
- pd_flights = self.pd_flights()
- pd_flights_timestamp = pd_flights.set_index('timestamp')
- pd_flights.info()
- pd_flights_timestamp.info()
- pd_flights.info()
-
- ed_flights = self.ed_flights()
- ed_flights_timestamp = ed_flights.set_index('timestamp')
- ed_flights.info()
- ed_flights_timestamp.info()
- ed_flights.info()
-
- def test_to_pandas(self):
- ed_ecommerce_pd_df = self.ed_ecommerce().to_pandas()
-
- assert_frame_equal(self.pd_ecommerce(), ed_ecommerce_pd_df)
diff --git a/eland/tests/dataframe/test_getitem_pytest.py b/eland/tests/dataframe/test_getitem_pytest.py
deleted file mode 100644
index e9b58ba..0000000
--- a/eland/tests/dataframe/test_getitem_pytest.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# File called _pytest for PyCharm compatability
-from eland.tests.common import TestData
-
-import pandas as pd
-import io
-
-from pandas.util.testing import (
- assert_series_equal, assert_frame_equal)
-
-class TestDataFrameGetItem(TestData):
-
- def test_getitem_basic(self):
- # Test 1 attribute
- pd_carrier = self.pd_flights()['Carrier']
- ed_carrier = self.ed_flights()['Carrier']
-
- # pandas returns a Series here
- assert_series_equal(pd_carrier.head(100), ed_carrier.head(100))
-
- pd_3_items = self.pd_flights()[['Dest','Carrier','FlightDelay']]
- ed_3_items = self.ed_flights()[['Dest','Carrier','FlightDelay']]
-
- assert_frame_equal(pd_3_items.head(100), ed_3_items.head(100))
-
- # Test numerics
- numerics = ['DistanceMiles', 'AvgTicketPrice', 'FlightTimeMin']
- ed_numerics = self.ed_flights()[numerics]
- pd_numerics = self.pd_flights()[numerics]
-
- assert_frame_equal(pd_numerics.head(100), ed_numerics.head(100))
-
- # just test headers
- ed_numerics_describe = ed_numerics.describe()
- assert ed_numerics_describe.columns.tolist() == numerics
-
- def test_getattr_basic(self):
- # Test 1 attribute
- pd_carrier = self.pd_flights().Carrier
- ed_carrier = self.ed_flights().Carrier
-
- assert_series_equal(pd_carrier.head(100), ed_carrier.head(100))
-
- pd_avgticketprice = self.pd_flights().AvgTicketPrice
- ed_avgticketprice = self.ed_flights().AvgTicketPrice
-
- assert_series_equal(pd_avgticketprice.head(100), ed_avgticketprice.head(100))
-
diff --git a/eland/tests/dataframe/test_head_tail_pytest.py b/eland/tests/dataframe/test_head_tail_pytest.py
new file mode 100644
index 0000000..1e3b7de
--- /dev/null
+++ b/eland/tests/dataframe/test_head_tail_pytest.py
@@ -0,0 +1,79 @@
+# File called _pytest for PyCharm compatability
+import pandas as pd
+import io
+
+import eland as ed
+
+from pandas.util.testing import (
+ assert_series_equal, assert_frame_equal)
+
+class TestDataFrameHeadTail():
+
+ def test_head(self):
+ ed_flights = ed.read_es(es_params='localhost', index_pattern='flights')
+
+ head_10 = ed_flights.head(10)
+ print(head_10._query_compiler._operations._to_es_query())
+
+ head_8 = head_10.head(8)
+ print(head_8._query_compiler._operations._to_es_query())
+
+ head_20 = head_10.head(20)
+ print(head_20._query_compiler._operations._to_es_query())
+
+ def test_tail(self):
+ ed_flights = ed.read_es(es_params='localhost', index_pattern='flights')
+
+ tail_10 = ed_flights.tail(10)
+ print(tail_10._query_compiler._operations._to_es_query())
+ print(tail_10)
+
+ tail_8 = tail_10.tail(8)
+ print(tail_8._query_compiler._operations._to_es_query())
+
+ tail_20 = tail_10.tail(20)
+ print(tail_20._query_compiler._operations._to_es_query())
+
+ def test_head_tail(self):
+ ed_flights = ed.read_es(es_params='localhost', index_pattern='flights')
+
+ head_10 = ed_flights.head(10)
+ print(head_10._query_compiler._operations._to_es_query())
+
+ tail_8 = head_10.tail(8)
+ print(tail_8._query_compiler._operations._to_es_query())
+
+ tail_5 = tail_8.tail(5)
+ print(tail_5._query_compiler._operations._to_es_query())
+
+ head_4 = tail_5.head(4)
+ print(head_4._query_compiler._operations._to_es_query())
+
+ def test_tail_head(self):
+ ed_flights = ed.read_es(es_params='localhost', index_pattern='flights')
+
+ tail_10 = ed_flights.tail(10)
+ print(tail_10._query_compiler._operations._to_es_query())
+
+ head_8 = tail_10.head(8)
+ print(head_8._query_compiler._operations._to_es_query())
+
+ head_5 = head_8.head(5)
+ print(head_5._query_compiler._operations._to_es_query())
+
+ tail_4 = head_5.tail(4)
+ print(tail_4._query_compiler._operations._to_es_query())
+
+ def test_head_tail_print(self):
+ ed_flights = ed.read_es(es_params='localhost', index_pattern='flights')
+
+ tail_100 = ed_flights.tail(100)
+ print(tail_100._query_compiler._operations._to_es_query())
+ print(tail_100)
+
+ head_10 = tail_100.head(10)
+ print(head_10)
+
+ tail_4 = head_10.tail(4)
+ print(tail_4._query_compiler._operations._to_es_query())
+ print(tail_4)
diff --git a/eland/tests/ecommerce.json.gz b/eland/tests/ecommerce.json.gz
deleted file mode 100644
index b1a5dff..0000000
Binary files a/eland/tests/ecommerce.json.gz and /dev/null differ
diff --git a/eland/tests/ecommerce_df.json.gz b/eland/tests/ecommerce_df.json.gz
deleted file mode 100644
index 11f5a98..0000000
Binary files a/eland/tests/ecommerce_df.json.gz and /dev/null differ
diff --git a/eland/tests/flights.json.gz b/eland/tests/flights.json.gz
deleted file mode 100644
index df976e6..0000000
Binary files a/eland/tests/flights.json.gz and /dev/null differ
diff --git a/eland/tests/flights_df.json.gz b/eland/tests/flights_df.json.gz
deleted file mode 100644
index 5aed61e..0000000
Binary files a/eland/tests/flights_df.json.gz and /dev/null differ
diff --git a/eland/tests/series/test_basics_pytest.py b/eland/tests/series/test_basics_pytest.py
deleted file mode 100644
index 861b8bf..0000000
--- a/eland/tests/series/test_basics_pytest.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# File called _pytest for PyCharm compatability
-from eland.tests.common import TestData
-
-import pandas as pd
-import eland as ed
-import io
-
-from eland.tests import ELASTICSEARCH_HOST
-from eland.tests import FLIGHTS_INDEX_NAME
-
-from pandas.util.testing import (
- assert_series_equal, assert_frame_equal)
-
-class TestSeriesBasics(TestData):
-
- def test_head_tail(self):
- pd_s = self.pd_flights()['Carrier']
- ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'Carrier')
-
- pd_s_head = pd_s.head(10)
- ed_s_head = ed_s.head(10)
-
- assert_series_equal(pd_s_head, ed_s_head)
-
- pd_s_tail = pd_s.tail(10)
- ed_s_tail = ed_s.tail(10)
-
- assert_series_equal(pd_s_tail, ed_s_tail)
-
- def test_print(self):
- ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'timestamp')
- print(ed_s.to_string())
diff --git a/eland/tests/setup_tests.py b/eland/tests/setup_tests.py
deleted file mode 100644
index b60fa1e..0000000
--- a/eland/tests/setup_tests.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import pandas as pd
-from elasticsearch import Elasticsearch
-from elasticsearch import helpers
-
-from eland.tests import *
-
-DATA_LIST = [
- (FLIGHTS_FILE_NAME, FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING),
- (ECOMMERCE_FILE_NAME, ECOMMERCE_INDEX_NAME, ECOMMERCE_MAPPING)
-]
-
-def _setup_data(es):
- # Read json file and index records into Elasticsearch
- for data in DATA_LIST:
- json_file_name = data[0]
- index_name = data[1]
- mapping = data[2]
-
- # Delete index
- print("Deleting index:", index_name)
- es.indices.delete(index=index_name, ignore=[400, 404])
- print("Creating index:", index_name)
- es.indices.create(index=index_name, body=mapping)
-
- df = pd.read_json(json_file_name, lines=True)
-
- actions = []
- n = 0
-
- print("Adding", df.shape[0], "items to index:", index_name)
- for index, row in df.iterrows():
- values = row.to_dict()
- # make timestamp datetime 2018-01-01T12:09:35
- #values['timestamp'] = datetime.strptime(values['timestamp'], '%Y-%m-%dT%H:%M:%S')
-
- # Use integer as id field for repeatable results
- action = {'_index': index_name, '_source': values, '_id': str(n)}
-
- actions.append(action)
-
- n = n + 1
-
- if n % 10000 == 0:
- helpers.bulk(es, actions)
- actions = []
-
- helpers.bulk(es, actions)
- actions = []
-
- print("Done", index_name)
-
-def _setup_test_mappings(es):
- # Create a complex mapping containing many Elasticsearch features
- es.indices.delete(index=TEST_MAPPING1_INDEX_NAME, ignore=[400, 404])
- es.indices.create(index=TEST_MAPPING1_INDEX_NAME, body=TEST_MAPPING1)
-
-def _setup_test_nested(es):
- es.indices.delete(index=TEST_NESTED_USER_GROUP_INDEX_NAME, ignore=[400, 404])
- es.indices.create(index=TEST_NESTED_USER_GROUP_INDEX_NAME, body=TEST_NESTED_USER_GROUP_MAPPING)
-
- helpers.bulk(es, TEST_NESTED_USER_GROUP_DOCS)
-
-if __name__ == '__main__':
- # Create connection to Elasticsearch - use defaults
- es = Elasticsearch(ELASTICSEARCH_HOST)
-
- _setup_data(es)
- _setup_test_mappings(es)
- _setup_test_nested(es)
diff --git a/eland/tests/test.ipynb b/eland/tests/test.ipynb
deleted file mode 100644
index 84e0121..0000000
--- a/eland/tests/test.ipynb
+++ /dev/null
@@ -1,5759 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Pandas"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "pycharm": {
- "is_executing": false
- }
- },
- "outputs": [],
- "source": [
- "import pandas as pd"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "pd_df = pd.read_json('flights.json.gz', lines=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " AvgTicketPrice | \n",
- " Cancelled | \n",
- " Carrier | \n",
- " Dest | \n",
- " DestAirportID | \n",
- " DestCityName | \n",
- " DestCountry | \n",
- " DestLocation | \n",
- " DestRegion | \n",
- " DestWeather | \n",
- " ... | \n",
- " FlightTimeMin | \n",
- " Origin | \n",
- " OriginAirportID | \n",
- " OriginCityName | \n",
- " OriginCountry | \n",
- " OriginLocation | \n",
- " OriginRegion | \n",
- " OriginWeather | \n",
- " dayOfWeek | \n",
- " timestamp | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 841.265642 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Sydney Kingsford Smith International Airport | \n",
- " SYD | \n",
- " Sydney | \n",
- " AU | \n",
- " {'lat': '-33.94609833', 'lon': '151.177002'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 1030.770416 | \n",
- " Frankfurt am Main Airport | \n",
- " FRA | \n",
- " Frankfurt am Main | \n",
- " DE | \n",
- " {'lat': '50.033333', 'lon': '8.570556'} | \n",
- " DE-HE | \n",
- " Sunny | \n",
- " 0 | \n",
- " 2018-01-01 00:00:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 882.982662 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Sunny | \n",
- " ... | \n",
- " 464.389481 | \n",
- " Cape Town International Airport | \n",
- " CPT | \n",
- " Cape Town | \n",
- " ZA | \n",
- " {'lat': '-33.96480179', 'lon': '18.60169983'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 18:27:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 190.636904 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Cloudy | \n",
- " ... | \n",
- " 0.000000 | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 17:11:14 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 181.694216 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Treviso-Sant'Angelo Airport | \n",
- " TV01 | \n",
- " Treviso | \n",
- " IT | \n",
- " {'lat': '45.648399', 'lon': '12.1944'} | \n",
- " IT-34 | \n",
- " Clear | \n",
- " ... | \n",
- " 222.749059 | \n",
- " Naples International Airport | \n",
- " NA01 | \n",
- " Naples | \n",
- " IT | \n",
- " {'lat': '40.886002', 'lon': '14.2908'} | \n",
- " IT-72 | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 10:33:28 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 730.041778 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 785.779071 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Damaging Wind | \n",
- " 0 | \n",
- " 2018-01-01 05:13:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 27 columns
\n",
- "
"
- ],
- "text/plain": [
- " AvgTicketPrice Cancelled Carrier \\\n",
- "0 841.265642 False Kibana Airlines \n",
- "1 882.982662 False Logstash Airways \n",
- "2 190.636904 False Logstash Airways \n",
- "3 181.694216 True Kibana Airlines \n",
- "4 730.041778 False Kibana Airlines \n",
- "\n",
- " Dest DestAirportID DestCityName \\\n",
- "0 Sydney Kingsford Smith International Airport SYD Sydney \n",
- "1 Venice Marco Polo Airport VE05 Venice \n",
- "2 Venice Marco Polo Airport VE05 Venice \n",
- "3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
- "4 Xi'an Xianyang International Airport XIY Xi'an \n",
- "\n",
- " DestCountry DestLocation DestRegion \\\n",
- "0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
- "1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
- "4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "\n",
- " DestWeather ... FlightTimeMin \\\n",
- "0 Rain ... 1030.770416 \n",
- "1 Sunny ... 464.389481 \n",
- "2 Cloudy ... 0.000000 \n",
- "3 Clear ... 222.749059 \n",
- "4 Clear ... 785.779071 \n",
- "\n",
- " Origin OriginAirportID \\\n",
- "0 Frankfurt am Main Airport FRA \n",
- "1 Cape Town International Airport CPT \n",
- "2 Venice Marco Polo Airport VE05 \n",
- "3 Naples International Airport NA01 \n",
- "4 Licenciado Benito Juarez International Airport AICM \n",
- "\n",
- " OriginCityName OriginCountry \\\n",
- "0 Frankfurt am Main DE \n",
- "1 Cape Town ZA \n",
- "2 Venice IT \n",
- "3 Naples IT \n",
- "4 Mexico City MX \n",
- "\n",
- " OriginLocation OriginRegion \\\n",
- "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
- "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
- "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
- "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "\n",
- " OriginWeather dayOfWeek timestamp \n",
- "0 Sunny 0 2018-01-01 00:00:00 \n",
- "1 Clear 0 2018-01-01 18:27:00 \n",
- "2 Rain 0 2018-01-01 17:11:14 \n",
- "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n",
- "4 Damaging Wind 0 2018-01-01 05:13:00 \n",
- "\n",
- "[5 rows x 27 columns]"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd_df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " AvgTicketPrice | \n",
- " DistanceKilometers | \n",
- " DistanceMiles | \n",
- " FlightDelayMin | \n",
- " FlightTimeHour | \n",
- " FlightTimeMin | \n",
- " dayOfWeek | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " count | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- "
\n",
- " \n",
- " mean | \n",
- " 628.253689 | \n",
- " 7092.142455 | \n",
- " 4406.853013 | \n",
- " 47.335171 | \n",
- " 8.518797 | \n",
- " 511.127842 | \n",
- " 2.835975 | \n",
- "
\n",
- " \n",
- " std | \n",
- " 266.396861 | \n",
- " 4578.438497 | \n",
- " 2844.909787 | \n",
- " 96.746711 | \n",
- " 5.579233 | \n",
- " 334.753952 | \n",
- " 1.939439 | \n",
- "
\n",
- " \n",
- " min | \n",
- " 100.020528 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 25% | \n",
- " 409.893816 | \n",
- " 2459.705673 | \n",
- " 1528.390247 | \n",
- " 0.000000 | \n",
- " 4.205553 | \n",
- " 252.333192 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- " 50% | \n",
- " 640.556668 | \n",
- " 7610.330866 | \n",
- " 4728.840363 | \n",
- " 0.000000 | \n",
- " 8.384086 | \n",
- " 503.045170 | \n",
- " 3.000000 | \n",
- "
\n",
- " \n",
- " 75% | \n",
- " 842.185470 | \n",
- " 9736.637600 | \n",
- " 6050.066114 | \n",
- " 15.000000 | \n",
- " 12.006934 | \n",
- " 720.416036 | \n",
- " 4.000000 | \n",
- "
\n",
- " \n",
- " max | \n",
- " 1199.729053 | \n",
- " 19881.482315 | \n",
- " 12353.780369 | \n",
- " 360.000000 | \n",
- " 31.715034 | \n",
- " 1902.902032 | \n",
- " 6.000000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
- "count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
- "mean 628.253689 7092.142455 4406.853013 47.335171 \n",
- "std 266.396861 4578.438497 2844.909787 96.746711 \n",
- "min 100.020528 0.000000 0.000000 0.000000 \n",
- "25% 409.893816 2459.705673 1528.390247 0.000000 \n",
- "50% 640.556668 7610.330866 4728.840363 0.000000 \n",
- "75% 842.185470 9736.637600 6050.066114 15.000000 \n",
- "max 1199.729053 19881.482315 12353.780369 360.000000 \n",
- "\n",
- " FlightTimeHour FlightTimeMin dayOfWeek \n",
- "count 13059.000000 13059.000000 13059.000000 \n",
- "mean 8.518797 511.127842 2.835975 \n",
- "std 5.579233 334.753952 1.939439 \n",
- "min 0.000000 0.000000 0.000000 \n",
- "25% 4.205553 252.333192 1.000000 \n",
- "50% 8.384086 503.045170 3.000000 \n",
- "75% 12.006934 720.416036 4.000000 \n",
- "max 31.715034 1902.902032 6.000000 "
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd_df.describe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " AvgTicketPrice | \n",
- " Cancelled | \n",
- " Carrier | \n",
- " Dest | \n",
- " DestAirportID | \n",
- " DestCityName | \n",
- " DestCountry | \n",
- " DestLocation | \n",
- " DestRegion | \n",
- " DestWeather | \n",
- " ... | \n",
- " FlightTimeMin | \n",
- " Origin | \n",
- " OriginAirportID | \n",
- " OriginCityName | \n",
- " OriginCountry | \n",
- " OriginLocation | \n",
- " OriginRegion | \n",
- " OriginWeather | \n",
- " dayOfWeek | \n",
- " timestamp | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 841.265642 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Sydney Kingsford Smith International Airport | \n",
- " SYD | \n",
- " Sydney | \n",
- " AU | \n",
- " {'lat': '-33.94609833', 'lon': '151.177002'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 1030.770416 | \n",
- " Frankfurt am Main Airport | \n",
- " FRA | \n",
- " Frankfurt am Main | \n",
- " DE | \n",
- " {'lat': '50.033333', 'lon': '8.570556'} | \n",
- " DE-HE | \n",
- " Sunny | \n",
- " 0 | \n",
- " 2018-01-01 00:00:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 882.982662 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Sunny | \n",
- " ... | \n",
- " 464.389481 | \n",
- " Cape Town International Airport | \n",
- " CPT | \n",
- " Cape Town | \n",
- " ZA | \n",
- " {'lat': '-33.96480179', 'lon': '18.60169983'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 18:27:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 190.636904 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Cloudy | \n",
- " ... | \n",
- " 0.000000 | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 17:11:14 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 181.694216 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Treviso-Sant'Angelo Airport | \n",
- " TV01 | \n",
- " Treviso | \n",
- " IT | \n",
- " {'lat': '45.648399', 'lon': '12.1944'} | \n",
- " IT-34 | \n",
- " Clear | \n",
- " ... | \n",
- " 222.749059 | \n",
- " Naples International Airport | \n",
- " NA01 | \n",
- " Naples | \n",
- " IT | \n",
- " {'lat': '40.886002', 'lon': '14.2908'} | \n",
- " IT-72 | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 10:33:28 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 730.041778 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 785.779071 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Damaging Wind | \n",
- " 0 | \n",
- " 2018-01-01 05:13:00 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 418.152089 | \n",
- " False | \n",
- " JetBeats | \n",
- " Genoa Cristoforo Colombo Airport | \n",
- " GE01 | \n",
- " Genova | \n",
- " IT | \n",
- " {'lat': '44.4133', 'lon': '8.8375'} | \n",
- " IT-42 | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 393.590441 | \n",
- " Edmonton International Airport | \n",
- " CYEG | \n",
- " Edmonton | \n",
- " CA | \n",
- " {'lat': '53.30970001', 'lon': '-113.5800018'} | \n",
- " CA-AB | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 01:43:03 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 180.246816 | \n",
- " False | \n",
- " JetBeats | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Hail | \n",
- " ... | \n",
- " 300.000000 | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 13:49:53 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 585.184310 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Ottawa Macdonald-Cartier International Airport | \n",
- " YOW | \n",
- " Ottawa | \n",
- " CA | \n",
- " {'lat': '45.32249832', 'lon': '-75.66919708'} | \n",
- " CA-ON | \n",
- " Clear | \n",
- " ... | \n",
- " 614.942480 | \n",
- " Ciampino___G. B. Pastine International Airport | \n",
- " RM12 | \n",
- " Rome | \n",
- " IT | \n",
- " {'lat': '41.7994', 'lon': '12.5949'} | \n",
- " IT-62 | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 04:54:59 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 960.869736 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Rajiv Gandhi International Airport | \n",
- " HYD | \n",
- " Hyderabad | \n",
- " IN | \n",
- " {'lat': '17.23131752', 'lon': '78.42985535'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " ... | \n",
- " 602.030591 | \n",
- " Milano Linate Airport | \n",
- " MI11 | \n",
- " Milan | \n",
- " IT | \n",
- " {'lat': '45.445099', 'lon': '9.27674'} | \n",
- " IT-25 | \n",
- " Heavy Fog | \n",
- " 0 | \n",
- " 2018-01-01 12:09:35 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 296.877773 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Treviso-Sant'Angelo Airport | \n",
- " TV01 | \n",
- " Treviso | \n",
- " IT | \n",
- " {'lat': '45.648399', 'lon': '12.1944'} | \n",
- " IT-34 | \n",
- " Rain | \n",
- " ... | \n",
- " 174.822216 | \n",
- " Sheremetyevo International Airport | \n",
- " SVO | \n",
- " Moscow | \n",
- " RU | \n",
- " {'lat': '55.972599', 'lon': '37.4146'} | \n",
- " RU-MOS | \n",
- " Cloudy | \n",
- " 0 | \n",
- " 2018-01-01 12:09:35 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 906.437948 | \n",
- " False | \n",
- " JetBeats | \n",
- " Helsinki Vantaa Airport | \n",
- " HEL | \n",
- " Helsinki | \n",
- " FI | \n",
- " {'lat': '60.31719971', 'lon': '24.9633007'} | \n",
- " FI-ES | \n",
- " Rain | \n",
- " ... | \n",
- " 503.045170 | \n",
- " Albuquerque International Sunport Airport | \n",
- " ABQ | \n",
- " Albuquerque | \n",
- " US | \n",
- " {'lat': '35.040199', 'lon': '-106.609001'} | \n",
- " US-NM | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 22:06:14 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 704.463771 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Cloudy | \n",
- " ... | \n",
- " 36.075018 | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 11:52:34 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 922.499077 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Shanghai Pudong International Airport | \n",
- " PVG | \n",
- " Shanghai | \n",
- " CN | \n",
- " {'lat': '31.14340019', 'lon': '121.8050003'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 679.768391 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Heavy Fog | \n",
- " 0 | \n",
- " 2018-01-01 02:13:46 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 374.959276 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Ottawa Macdonald-Cartier International Airport | \n",
- " YOW | \n",
- " Ottawa | \n",
- " CA | \n",
- " {'lat': '45.32249832', 'lon': '-75.66919708'} | \n",
- " CA-ON | \n",
- " Rain | \n",
- " ... | \n",
- " 330.418282 | \n",
- " Naples International Airport | \n",
- " NA01 | \n",
- " Naples | \n",
- " IT | \n",
- " {'lat': '40.886002', 'lon': '14.2908'} | \n",
- " IT-72 | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 14:21:13 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 552.917371 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Luis Munoz Marin International Airport | \n",
- " SJU | \n",
- " San Juan | \n",
- " PR | \n",
- " {'lat': '18.43939972', 'lon': '-66.00180054'} | \n",
- " PR-U-A | \n",
- " Clear | \n",
- " ... | \n",
- " 407.145031 | \n",
- " Ciampino___G. B. Pastine International Airport | \n",
- " RM12 | \n",
- " Rome | \n",
- " IT | \n",
- " {'lat': '41.7994', 'lon': '12.5949'} | \n",
- " IT-62 | \n",
- " Cloudy | \n",
- " 0 | \n",
- " 2018-01-01 17:42:53 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 566.487557 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Cologne Bonn Airport | \n",
- " CGN | \n",
- " Cologne | \n",
- " DE | \n",
- " {'lat': '50.86589813', 'lon': '7.142739773'} | \n",
- " DE-NW | \n",
- " Sunny | \n",
- " ... | \n",
- " 656.712658 | \n",
- " Chengdu Shuangliu International Airport | \n",
- " CTU | \n",
- " Chengdu | \n",
- " CN | \n",
- " {'lat': '30.57850075', 'lon': '103.9469986'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 19:55:32 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 989.952787 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Damaging Wind | \n",
- " ... | \n",
- " 773.030334 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 07:49:27 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 569.613255 | \n",
- " False | \n",
- " ES-Air | \n",
- " Ministro Pistarini International Airport | \n",
- " EZE | \n",
- " Buenos Aires | \n",
- " AR | \n",
- " {'lat': '-34.8222', 'lon': '-58.5358'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " ... | \n",
- " 704.716920 | \n",
- " Cleveland Hopkins International Airport | \n",
- " CLE | \n",
- " Cleveland | \n",
- " US | \n",
- " {'lat': '41.4117012', 'lon': '-81.84980011'} | \n",
- " US-OH | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 01:30:47 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 277.429707 | \n",
- " False | \n",
- " ES-Air | \n",
- " Shanghai Pudong International Airport | \n",
- " PVG | \n",
- " Shanghai | \n",
- " CN | \n",
- " {'lat': '31.14340019', 'lon': '121.8050003'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 355.957996 | \n",
- " Olenya Air Base | \n",
- " XLMO | \n",
- " Olenegorsk | \n",
- " RU | \n",
- " {'lat': '68.15180206', 'lon': '33.46390152'} | \n",
- " RU-MUR | \n",
- " Hail | \n",
- " 0 | \n",
- " 2018-01-01 07:58:17 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 772.100846 | \n",
- " False | \n",
- " JetBeats | \n",
- " Indira Gandhi International Airport | \n",
- " DEL | \n",
- " New Delhi | \n",
- " IN | \n",
- " {'lat': '28.5665', 'lon': '77.103104'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 875.114675 | \n",
- " Casper-Natrona County International Airport | \n",
- " CPR | \n",
- " Casper | \n",
- " US | \n",
- " {'lat': '42.90800095', 'lon': '-106.4639969'} | \n",
- " US-WY | \n",
- " Cloudy | \n",
- " 0 | \n",
- " 2018-01-01 00:02:06 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 167.599922 | \n",
- " False | \n",
- " JetBeats | \n",
- " Wichita Mid Continent Airport | \n",
- " ICT | \n",
- " Wichita | \n",
- " US | \n",
- " {'lat': '37.64989853', 'lon': '-97.43309784'} | \n",
- " US-KS | \n",
- " Clear | \n",
- " ... | \n",
- " 373.966883 | \n",
- " Erie International Tom Ridge Field | \n",
- " ERI | \n",
- " Erie | \n",
- " US | \n",
- " {'lat': '42.08312701', 'lon': '-80.17386675'} | \n",
- " US-PA | \n",
- " Cloudy | \n",
- " 0 | \n",
- " 2018-01-01 01:08:20 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 253.210065 | \n",
- " False | \n",
- " ES-Air | \n",
- " Ottawa Macdonald-Cartier International Airport | \n",
- " YOW | \n",
- " Ottawa | \n",
- " CA | \n",
- " {'lat': '45.32249832', 'lon': '-75.66919708'} | \n",
- " CA-ON | \n",
- " Hail | \n",
- " ... | \n",
- " 130.667700 | \n",
- " Newark Liberty International Airport | \n",
- " EWR | \n",
- " Newark | \n",
- " US | \n",
- " {'lat': '40.69250107', 'lon': '-74.16870117'} | \n",
- " US-NJ | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 01:08:20 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 917.247620 | \n",
- " False | \n",
- " JetBeats | \n",
- " Itami Airport | \n",
- " ITM | \n",
- " Osaka | \n",
- " JP | \n",
- " {'lat': '34.78549957', 'lon': '135.4380035'} | \n",
- " SE-BD | \n",
- " Damaging Wind | \n",
- " ... | \n",
- " 574.495310 | \n",
- " Copenhagen Kastrup Airport | \n",
- " CPH | \n",
- " Copenhagen | \n",
- " DK | \n",
- " {'lat': '55.61790085', 'lon': '12.65600014'} | \n",
- " DK-84 | \n",
- " Sunny | \n",
- " 0 | \n",
- " 2018-01-01 07:48:35 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 451.591176 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Heavy Fog | \n",
- " ... | \n",
- " 579.728943 | \n",
- " Seattle Tacoma International Airport | \n",
- " SEA | \n",
- " Seattle | \n",
- " US | \n",
- " {'lat': '47.44900131', 'lon': '-122.3089981'} | \n",
- " US-WA | \n",
- " Heavy Fog | \n",
- " 0 | \n",
- " 2018-01-01 18:57:21 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 307.067201 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Charles de Gaulle International Airport | \n",
- " CDG | \n",
- " Paris | \n",
- " FR | \n",
- " {'lat': '49.01279831', 'lon': '2.549999952'} | \n",
- " FR-J | \n",
- " Clear | \n",
- " ... | \n",
- " 50.157229 | \n",
- " Berlin-Tegel Airport | \n",
- " TXL | \n",
- " Berlin | \n",
- " DE | \n",
- " {'lat': '52.5597', 'lon': '13.2877'} | \n",
- " DE-BE | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 13:18:25 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 268.241596 | \n",
- " False | \n",
- " ES-Air | \n",
- " Narita International Airport | \n",
- " NRT | \n",
- " Tokyo | \n",
- " JP | \n",
- " {'lat': '35.76470184', 'lon': '140.3860016'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 527.567422 | \n",
- " Manchester Airport | \n",
- " MAN | \n",
- " Manchester | \n",
- " GB | \n",
- " {'lat': '53.35369873', 'lon': '-2.274950027'} | \n",
- " GB-ENG | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 08:20:35 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 975.812632 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Itami Airport | \n",
- " ITM | \n",
- " Osaka | \n",
- " JP | \n",
- " {'lat': '34.78549957', 'lon': '135.4380035'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " ... | \n",
- " 386.259764 | \n",
- " Helsinki Vantaa Airport | \n",
- " HEL | \n",
- " Helsinki | \n",
- " FI | \n",
- " {'lat': '60.31719971', 'lon': '24.9633007'} | \n",
- " FI-ES | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 15:38:32 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 134.214546 | \n",
- " False | \n",
- " JetBeats | \n",
- " San Diego International Airport | \n",
- " SAN | \n",
- " San Diego | \n",
- " US | \n",
- " {'lat': '32.73360062', 'lon': '-117.1900024'} | \n",
- " US-CA | \n",
- " Clear | \n",
- " ... | \n",
- " 24.479650 | \n",
- " Phoenix Sky Harbor International Airport | \n",
- " PHX | \n",
- " Phoenix | \n",
- " US | \n",
- " {'lat': '33.43429947', 'lon': '-112.012001'} | \n",
- " US-AZ | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 03:08:45 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 988.897564 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Verona Villafranca Airport | \n",
- " VR10 | \n",
- " Verona | \n",
- " IT | \n",
- " {'lat': '45.395699', 'lon': '10.8885'} | \n",
- " IT-34 | \n",
- " Sunny | \n",
- " ... | \n",
- " 568.351033 | \n",
- " New Chitose Airport | \n",
- " CTS | \n",
- " Chitose / Tomakomai | \n",
- " JP | \n",
- " {'lat': '42.77519989', 'lon': '141.6920013'} | \n",
- " SE-BD | \n",
- " Damaging Wind | \n",
- " 0 | \n",
- " 2018-01-01 01:16:59 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 511.067220 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Rain | \n",
- " ... | \n",
- " 425.889194 | \n",
- " Tulsa International Airport | \n",
- " TUL | \n",
- " Tulsa | \n",
- " US | \n",
- " {'lat': '36.19839859', 'lon': '-95.88809967'} | \n",
- " US-OK | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 18:00:59 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 13029 | \n",
- " 795.905278 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Malpensa International Airport | \n",
- " MI12 | \n",
- " Milan | \n",
- " IT | \n",
- " {'lat': '45.6306', 'lon': '8.72811'} | \n",
- " IT-25 | \n",
- " Sunny | \n",
- " ... | \n",
- " 534.375826 | \n",
- " Itami Airport | \n",
- " ITM | \n",
- " Osaka | \n",
- " JP | \n",
- " {'lat': '34.78549957', 'lon': '135.4380035'} | \n",
- " SE-BD | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 20:10:13 | \n",
- "
\n",
- " \n",
- " 13030 | \n",
- " 863.388068 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Damaging Wind | \n",
- " ... | \n",
- " 141.172633 | \n",
- " Tokyo Haneda International Airport | \n",
- " HND | \n",
- " Tokyo | \n",
- " JP | \n",
- " {'lat': '35.552299', 'lon': '139.779999'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " 6 | \n",
- " 2018-02-11 18:59:53 | \n",
- "
\n",
- " \n",
- " 13031 | \n",
- " 575.183008 | \n",
- " False | \n",
- " JetBeats | \n",
- " Savannah Hilton Head International Airport | \n",
- " SAV | \n",
- " Savannah | \n",
- " US | \n",
- " {'lat': '32.12760162', 'lon': '-81.20210266'} | \n",
- " US-GA | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 1113.137060 | \n",
- " OR Tambo International Airport | \n",
- " JNB | \n",
- " Johannesburg | \n",
- " ZA | \n",
- " {'lat': '-26.1392', 'lon': '28.246'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 00:57:48 | \n",
- "
\n",
- " \n",
- " 13032 | \n",
- " 817.368952 | \n",
- " False | \n",
- " JetBeats | \n",
- " Syracuse Hancock International Airport | \n",
- " SYR | \n",
- " Syracuse | \n",
- " US | \n",
- " {'lat': '43.11119843', 'lon': '-76.10630035'} | \n",
- " US-NY | \n",
- " Rain | \n",
- " ... | \n",
- " 714.964864 | \n",
- " El Dorado International Airport | \n",
- " BOG | \n",
- " Bogota | \n",
- " CO | \n",
- " {'lat': '4.70159', 'lon': '-74.1469'} | \n",
- " CO-CUN | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 12:02:49 | \n",
- "
\n",
- " \n",
- " 13033 | \n",
- " 579.582455 | \n",
- " False | \n",
- " ES-Air | \n",
- " Tampa International Airport | \n",
- " TPA | \n",
- " Tampa | \n",
- " US | \n",
- " {'lat': '27.97550011', 'lon': '-82.53320313'} | \n",
- " US-FL | \n",
- " Rain | \n",
- " ... | \n",
- " 234.929046 | \n",
- " Jorge Chavez International Airport | \n",
- " LIM | \n",
- " Lima | \n",
- " PE | \n",
- " {'lat': '-12.0219', 'lon': '-77.114304'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 02:07:40 | \n",
- "
\n",
- " \n",
- " 13034 | \n",
- " 1004.916638 | \n",
- " False | \n",
- " JetBeats | \n",
- " Olenya Air Base | \n",
- " XLMO | \n",
- " Olenegorsk | \n",
- " RU | \n",
- " {'lat': '68.15180206', 'lon': '33.46390152'} | \n",
- " RU-MUR | \n",
- " Clear | \n",
- " ... | \n",
- " 526.895776 | \n",
- " Gimpo International Airport | \n",
- " GMP | \n",
- " Seoul | \n",
- " KR | \n",
- " {'lat': '37.5583', 'lon': '126.791'} | \n",
- " SE-BD | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 00:35:04 | \n",
- "
\n",
- " \n",
- " 13035 | \n",
- " 357.562842 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Shanghai Pudong International Airport | \n",
- " PVG | \n",
- " Shanghai | \n",
- " CN | \n",
- " {'lat': '31.14340019', 'lon': '121.8050003'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 0.000000 | \n",
- " Shanghai Pudong International Airport | \n",
- " PVG | \n",
- " Shanghai | \n",
- " CN | \n",
- " {'lat': '31.14340019', 'lon': '121.8050003'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 11:19:12 | \n",
- "
\n",
- " \n",
- " 13036 | \n",
- " 429.580539 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Sunny | \n",
- " ... | \n",
- " 150.000000 | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Cloudy | \n",
- " 6 | \n",
- " 2018-02-11 15:07:11 | \n",
- "
\n",
- " \n",
- " 13037 | \n",
- " 729.788171 | \n",
- " True | \n",
- " ES-Air | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Rain | \n",
- " ... | \n",
- " 691.944839 | \n",
- " Ukrainka Air Base | \n",
- " XHBU | \n",
- " Belogorsk | \n",
- " RU | \n",
- " {'lat': '51.169997', 'lon': '128.445007'} | \n",
- " RU-AMU | \n",
- " Damaging Wind | \n",
- " 6 | \n",
- " 2018-02-11 10:24:42 | \n",
- "
\n",
- " \n",
- " 13038 | \n",
- " 564.897695 | \n",
- " False | \n",
- " ES-Air | \n",
- " Pisa International Airport | \n",
- " PI05 | \n",
- " Pisa | \n",
- " IT | \n",
- " {'lat': '43.683899', 'lon': '10.3927'} | \n",
- " IT-52 | \n",
- " Heavy Fog | \n",
- " ... | \n",
- " 567.387339 | \n",
- " OR Tambo International Airport | \n",
- " JNB | \n",
- " Johannesburg | \n",
- " ZA | \n",
- " {'lat': '-26.1392', 'lon': '28.246'} | \n",
- " SE-BD | \n",
- " Damaging Wind | \n",
- " 6 | \n",
- " 2018-02-11 00:42:06 | \n",
- "
\n",
- " \n",
- " 13039 | \n",
- " 1014.052787 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 690.092327 | \n",
- " Montreal / Pierre Elliott Trudeau Internationa... | \n",
- " YUL | \n",
- " Montreal | \n",
- " CA | \n",
- " {'lat': '45.47060013', 'lon': '-73.74079895'} | \n",
- " CA-QC | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 10:56:31 | \n",
- "
\n",
- " \n",
- " 13040 | \n",
- " 455.243843 | \n",
- " False | \n",
- " ES-Air | \n",
- " London Luton Airport | \n",
- " LTN | \n",
- " London | \n",
- " GB | \n",
- " {'lat': '51.87469864', 'lon': '-0.368333012'} | \n",
- " GB-ENG | \n",
- " Cloudy | \n",
- " ... | \n",
- " 3.028293 | \n",
- " London Heathrow Airport | \n",
- " LHR | \n",
- " London | \n",
- " GB | \n",
- " {'lat': '51.4706', 'lon': '-0.461941'} | \n",
- " GB-ENG | \n",
- " Clear | \n",
- " 6 | \n",
- " 2018-02-11 00:39:37 | \n",
- "
\n",
- " \n",
- " 13041 | \n",
- " 611.370232 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Jorge Chavez International Airport | \n",
- " LIM | \n",
- " Lima | \n",
- " PE | \n",
- " {'lat': '-12.0219', 'lon': '-77.114304'} | \n",
- " SE-BD | \n",
- " Sunny | \n",
- " ... | \n",
- " 338.875531 | \n",
- " Casper-Natrona County International Airport | \n",
- " CPR | \n",
- " Casper | \n",
- " US | \n",
- " {'lat': '42.90800095', 'lon': '-106.4639969'} | \n",
- " US-WY | \n",
- " Rain | \n",
- " 6 | \n",
- " 2018-02-11 10:24:30 | \n",
- "
\n",
- " \n",
- " 13042 | \n",
- " 595.961285 | \n",
- " False | \n",
- " JetBeats | \n",
- " Ottawa Macdonald-Cartier International Airport | \n",
- " YOW | \n",
- " Ottawa | \n",
- " CA | \n",
- " {'lat': '45.32249832', 'lon': '-75.66919708'} | \n",
- " CA-ON | \n",
- " Clear | \n",
- " ... | \n",
- " 375.129587 | \n",
- " Frankfurt am Main Airport | \n",
- " FRA | \n",
- " Frankfurt am Main | \n",
- " DE | \n",
- " {'lat': '50.033333', 'lon': '8.570556'} | \n",
- " DE-HE | \n",
- " Clear | \n",
- " 6 | \n",
- " 2018-02-11 09:02:07 | \n",
- "
\n",
- " \n",
- " 13043 | \n",
- " 782.747648 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 156.858481 | \n",
- " Tokyo Haneda International Airport | \n",
- " HND | \n",
- " Tokyo | \n",
- " JP | \n",
- " {'lat': '35.552299', 'lon': '139.779999'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 04:45:06 | \n",
- "
\n",
- " \n",
- " 13044 | \n",
- " 891.117221 | \n",
- " False | \n",
- " JetBeats | \n",
- " Winnipeg / James Armstrong Richardson Internat... | \n",
- " YWG | \n",
- " Winnipeg | \n",
- " CA | \n",
- " {'lat': '49.90999985', 'lon': '-97.23989868'} | \n",
- " CA-MB | \n",
- " Clear | \n",
- " ... | \n",
- " 354.106457 | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 00:51:14 | \n",
- "
\n",
- " \n",
- " 13045 | \n",
- " 587.169921 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Brisbane International Airport | \n",
- " BNE | \n",
- " Brisbane | \n",
- " AU | \n",
- " {'lat': '-27.38419914', 'lon': '153.1170044'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 771.305442 | \n",
- " Amsterdam Airport Schiphol | \n",
- " AMS | \n",
- " Amsterdam | \n",
- " NL | \n",
- " {'lat': '52.30860138', 'lon': '4.76388979'} | \n",
- " NL-NH | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 05:41:51 | \n",
- "
\n",
- " \n",
- " 13046 | \n",
- " 739.132165 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 542.955572 | \n",
- " Winnipeg / James Armstrong Richardson Internat... | \n",
- " YWG | \n",
- " Winnipeg | \n",
- " CA | \n",
- " {'lat': '49.90999985', 'lon': '-97.23989868'} | \n",
- " CA-MB | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 10:02:21 | \n",
- "
\n",
- " \n",
- " 13047 | \n",
- " 605.191876 | \n",
- " False | \n",
- " JetBeats | \n",
- " Portland International Jetport Airport | \n",
- " PWM | \n",
- " Portland | \n",
- " US | \n",
- " {'lat': '43.64619827', 'lon': '-70.30930328'} | \n",
- " US-ME | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 564.599857 | \n",
- " Jeju International Airport | \n",
- " CJU | \n",
- " Jeju City | \n",
- " KR | \n",
- " {'lat': '33.51129913', 'lon': '126.4929962'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " 6 | \n",
- " 2018-02-11 15:55:10 | \n",
- "
\n",
- " \n",
- " 13048 | \n",
- " 361.767659 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Dubai International Airport | \n",
- " DXB | \n",
- " Dubai | \n",
- " AE | \n",
- " {'lat': '25.25279999', 'lon': '55.36439896'} | \n",
- " SE-BD | \n",
- " Sunny | \n",
- " ... | \n",
- " 180.000000 | \n",
- " Dubai International Airport | \n",
- " DXB | \n",
- " Dubai | \n",
- " AE | \n",
- " {'lat': '25.25279999', 'lon': '55.36439896'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 04:11:14 | \n",
- "
\n",
- " \n",
- " 13049 | \n",
- " 662.306992 | \n",
- " False | \n",
- " ES-Air | \n",
- " Winnipeg / James Armstrong Richardson Internat... | \n",
- " YWG | \n",
- " Winnipeg | \n",
- " CA | \n",
- " {'lat': '49.90999985', 'lon': '-97.23989868'} | \n",
- " CA-MB | \n",
- " Heavy Fog | \n",
- " ... | \n",
- " 835.954429 | \n",
- " Ministro Pistarini International Airport | \n",
- " EZE | \n",
- " Buenos Aires | \n",
- " AR | \n",
- " {'lat': '-34.8222', 'lon': '-58.5358'} | \n",
- " AR-B | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 10:13:32 | \n",
- "
\n",
- " \n",
- " 13050 | \n",
- " 630.779526 | \n",
- " False | \n",
- " JetBeats | \n",
- " Helsinki Vantaa Airport | \n",
- " HEL | \n",
- " Helsinki | \n",
- " FI | \n",
- " {'lat': '60.31719971', 'lon': '24.9633007'} | \n",
- " FI-ES | \n",
- " Sunny | \n",
- " ... | \n",
- " 451.755639 | \n",
- " Beijing Capital International Airport | \n",
- " PEK | \n",
- " Beijing | \n",
- " CN | \n",
- " {'lat': '40.08010101', 'lon': '116.5849991'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " 6 | \n",
- " 2018-02-11 11:23:23 | \n",
- "
\n",
- " \n",
- " 13051 | \n",
- " 937.771279 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Lester B. Pearson International Airport | \n",
- " YYZ | \n",
- " Toronto | \n",
- " CA | \n",
- " {'lat': '43.67720032', 'lon': '-79.63059998'} | \n",
- " CA-ON | \n",
- " Sunny | \n",
- " ... | \n",
- " 507.451571 | \n",
- " Leonardo da Vinci___Fiumicino Airport | \n",
- " RM11 | \n",
- " Rome | \n",
- " IT | \n",
- " {'lat': '41.8002778', 'lon': '12.2388889'} | \n",
- " IT-62 | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 01:13:50 | \n",
- "
\n",
- " \n",
- " 13052 | \n",
- " 1085.155339 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Melbourne International Airport | \n",
- " MEL | \n",
- " Melbourne | \n",
- " AU | \n",
- " {'lat': '-37.673302', 'lon': '144.843002'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " ... | \n",
- " 1044.451122 | \n",
- " Bologna Guglielmo Marconi Airport | \n",
- " BO08 | \n",
- " Bologna | \n",
- " IT | \n",
- " {'lat': '44.5354', 'lon': '11.2887'} | \n",
- " IT-45 | \n",
- " Cloudy | \n",
- " 6 | \n",
- " 2018-02-11 18:35:42 | \n",
- "
\n",
- " \n",
- " 13053 | \n",
- " 1191.964104 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Hail | \n",
- " ... | \n",
- " 728.715904 | \n",
- " Portland International Jetport Airport | \n",
- " PWM | \n",
- " Portland | \n",
- " US | \n",
- " {'lat': '43.64619827', 'lon': '-70.30930328'} | \n",
- " US-ME | \n",
- " Clear | \n",
- " 6 | \n",
- " 2018-02-11 19:02:10 | \n",
- "
\n",
- " \n",
- " 13054 | \n",
- " 1080.446279 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 402.929088 | \n",
- " Pisa International Airport | \n",
- " PI05 | \n",
- " Pisa | \n",
- " IT | \n",
- " {'lat': '43.683899', 'lon': '10.3927'} | \n",
- " IT-52 | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 20:42:25 | \n",
- "
\n",
- " \n",
- " 13055 | \n",
- " 646.612941 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Rain | \n",
- " ... | \n",
- " 644.418029 | \n",
- " Winnipeg / James Armstrong Richardson Internat... | \n",
- " YWG | \n",
- " Winnipeg | \n",
- " CA | \n",
- " {'lat': '49.90999985', 'lon': '-97.23989868'} | \n",
- " CA-MB | \n",
- " Rain | \n",
- " 6 | \n",
- " 2018-02-11 01:41:57 | \n",
- "
\n",
- " \n",
- " 13056 | \n",
- " 997.751876 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Ukrainka Air Base | \n",
- " XHBU | \n",
- " Belogorsk | \n",
- " RU | \n",
- " {'lat': '51.169997', 'lon': '128.445007'} | \n",
- " RU-AMU | \n",
- " Rain | \n",
- " ... | \n",
- " 937.540811 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 04:09:27 | \n",
- "
\n",
- " \n",
- " 13057 | \n",
- " 1102.814465 | \n",
- " False | \n",
- " JetBeats | \n",
- " Ministro Pistarini International Airport | \n",
- " EZE | \n",
- " Buenos Aires | \n",
- " AR | \n",
- " {'lat': '-34.8222', 'lon': '-58.5358'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " ... | \n",
- " 1697.404971 | \n",
- " Itami Airport | \n",
- " ITM | \n",
- " Osaka | \n",
- " JP | \n",
- " {'lat': '34.78549957', 'lon': '135.4380035'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 08:28:21 | \n",
- "
\n",
- " \n",
- " 13058 | \n",
- " 858.144337 | \n",
- " False | \n",
- " JetBeats | \n",
- " Washington Dulles International Airport | \n",
- " IAD | \n",
- " Washington | \n",
- " US | \n",
- " {'lat': '38.94449997', 'lon': '-77.45580292'} | \n",
- " US-DC | \n",
- " Heavy Fog | \n",
- " ... | \n",
- " 1610.761827 | \n",
- " Adelaide International Airport | \n",
- " ADL | \n",
- " Adelaide | \n",
- " AU | \n",
- " {'lat': '-34.945', 'lon': '138.531006'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " 6 | \n",
- " 2018-02-11 14:54:34 | \n",
- "
\n",
- " \n",
- "
\n",
- "
13059 rows × 27 columns
\n",
- "
"
- ],
- "text/plain": [
- " AvgTicketPrice Cancelled Carrier \\\n",
- "0 841.265642 False Kibana Airlines \n",
- "1 882.982662 False Logstash Airways \n",
- "2 190.636904 False Logstash Airways \n",
- "3 181.694216 True Kibana Airlines \n",
- "4 730.041778 False Kibana Airlines \n",
- "5 418.152089 False JetBeats \n",
- "6 180.246816 False JetBeats \n",
- "7 585.184310 False Kibana Airlines \n",
- "8 960.869736 True Kibana Airlines \n",
- "9 296.877773 False Logstash Airways \n",
- "10 906.437948 False JetBeats \n",
- "11 704.463771 False Logstash Airways \n",
- "12 922.499077 True Logstash Airways \n",
- "13 374.959276 False Logstash Airways \n",
- "14 552.917371 False Logstash Airways \n",
- "15 566.487557 True Kibana Airlines \n",
- "16 989.952787 True Logstash Airways \n",
- "17 569.613255 False ES-Air \n",
- "18 277.429707 False ES-Air \n",
- "19 772.100846 False JetBeats \n",
- "20 167.599922 False JetBeats \n",
- "21 253.210065 False ES-Air \n",
- "22 917.247620 False JetBeats \n",
- "23 451.591176 False Logstash Airways \n",
- "24 307.067201 False Logstash Airways \n",
- "25 268.241596 False ES-Air \n",
- "26 975.812632 True Kibana Airlines \n",
- "27 134.214546 False JetBeats \n",
- "28 988.897564 False Kibana Airlines \n",
- "29 511.067220 False Logstash Airways \n",
- "... ... ... ... \n",
- "13029 795.905278 False Kibana Airlines \n",
- "13030 863.388068 False Logstash Airways \n",
- "13031 575.183008 False JetBeats \n",
- "13032 817.368952 False JetBeats \n",
- "13033 579.582455 False ES-Air \n",
- "13034 1004.916638 False JetBeats \n",
- "13035 357.562842 True Logstash Airways \n",
- "13036 429.580539 False Logstash Airways \n",
- "13037 729.788171 True ES-Air \n",
- "13038 564.897695 False ES-Air \n",
- "13039 1014.052787 False Logstash Airways \n",
- "13040 455.243843 False ES-Air \n",
- "13041 611.370232 False Logstash Airways \n",
- "13042 595.961285 False JetBeats \n",
- "13043 782.747648 False Logstash Airways \n",
- "13044 891.117221 False JetBeats \n",
- "13045 587.169921 False Logstash Airways \n",
- "13046 739.132165 False Logstash Airways \n",
- "13047 605.191876 False JetBeats \n",
- "13048 361.767659 True Logstash Airways \n",
- "13049 662.306992 False ES-Air \n",
- "13050 630.779526 False JetBeats \n",
- "13051 937.771279 True Logstash Airways \n",
- "13052 1085.155339 False Logstash Airways \n",
- "13053 1191.964104 False Logstash Airways \n",
- "13054 1080.446279 False Logstash Airways \n",
- "13055 646.612941 False Logstash Airways \n",
- "13056 997.751876 False Logstash Airways \n",
- "13057 1102.814465 False JetBeats \n",
- "13058 858.144337 False JetBeats \n",
- "\n",
- " Dest DestAirportID \\\n",
- "0 Sydney Kingsford Smith International Airport SYD \n",
- "1 Venice Marco Polo Airport VE05 \n",
- "2 Venice Marco Polo Airport VE05 \n",
- "3 Treviso-Sant'Angelo Airport TV01 \n",
- "4 Xi'an Xianyang International Airport XIY \n",
- "5 Genoa Cristoforo Colombo Airport GE01 \n",
- "6 Zurich Airport ZRH \n",
- "7 Ottawa Macdonald-Cartier International Airport YOW \n",
- "8 Rajiv Gandhi International Airport HYD \n",
- "9 Treviso-Sant'Angelo Airport TV01 \n",
- "10 Helsinki Vantaa Airport HEL \n",
- "11 Vienna International Airport VIE \n",
- "12 Shanghai Pudong International Airport PVG \n",
- "13 Ottawa Macdonald-Cartier International Airport YOW \n",
- "14 Luis Munoz Marin International Airport SJU \n",
- "15 Cologne Bonn Airport CGN \n",
- "16 Venice Marco Polo Airport VE05 \n",
- "17 Ministro Pistarini International Airport EZE \n",
- "18 Shanghai Pudong International Airport PVG \n",
- "19 Indira Gandhi International Airport DEL \n",
- "20 Wichita Mid Continent Airport ICT \n",
- "21 Ottawa Macdonald-Cartier International Airport YOW \n",
- "22 Itami Airport ITM \n",
- "23 Vienna International Airport VIE \n",
- "24 Charles de Gaulle International Airport CDG \n",
- "25 Narita International Airport NRT \n",
- "26 Itami Airport ITM \n",
- "27 San Diego International Airport SAN \n",
- "28 Verona Villafranca Airport VR10 \n",
- "29 Zurich Airport ZRH \n",
- "... ... ... \n",
- "13029 Malpensa International Airport MI12 \n",
- "13030 Xi'an Xianyang International Airport XIY \n",
- "13031 Savannah Hilton Head International Airport SAV \n",
- "13032 Syracuse Hancock International Airport SYR \n",
- "13033 Tampa International Airport TPA \n",
- "13034 Olenya Air Base XLMO \n",
- "13035 Shanghai Pudong International Airport PVG \n",
- "13036 Venice Marco Polo Airport VE05 \n",
- "13037 Vienna International Airport VIE \n",
- "13038 Pisa International Airport PI05 \n",
- "13039 Vienna International Airport VIE \n",
- "13040 London Luton Airport LTN \n",
- "13041 Jorge Chavez International Airport LIM \n",
- "13042 Ottawa Macdonald-Cartier International Airport YOW \n",
- "13043 Xi'an Xianyang International Airport XIY \n",
- "13044 Winnipeg / James Armstrong Richardson Internat... YWG \n",
- "13045 Brisbane International Airport BNE \n",
- "13046 Xi'an Xianyang International Airport XIY \n",
- "13047 Portland International Jetport Airport PWM \n",
- "13048 Dubai International Airport DXB \n",
- "13049 Winnipeg / James Armstrong Richardson Internat... YWG \n",
- "13050 Helsinki Vantaa Airport HEL \n",
- "13051 Lester B. Pearson International Airport YYZ \n",
- "13052 Melbourne International Airport MEL \n",
- "13053 Zurich Airport ZRH \n",
- "13054 Xi'an Xianyang International Airport XIY \n",
- "13055 Zurich Airport ZRH \n",
- "13056 Ukrainka Air Base XHBU \n",
- "13057 Ministro Pistarini International Airport EZE \n",
- "13058 Washington Dulles International Airport IAD \n",
- "\n",
- " DestCityName DestCountry \\\n",
- "0 Sydney AU \n",
- "1 Venice IT \n",
- "2 Venice IT \n",
- "3 Treviso IT \n",
- "4 Xi'an CN \n",
- "5 Genova IT \n",
- "6 Zurich CH \n",
- "7 Ottawa CA \n",
- "8 Hyderabad IN \n",
- "9 Treviso IT \n",
- "10 Helsinki FI \n",
- "11 Vienna AT \n",
- "12 Shanghai CN \n",
- "13 Ottawa CA \n",
- "14 San Juan PR \n",
- "15 Cologne DE \n",
- "16 Venice IT \n",
- "17 Buenos Aires AR \n",
- "18 Shanghai CN \n",
- "19 New Delhi IN \n",
- "20 Wichita US \n",
- "21 Ottawa CA \n",
- "22 Osaka JP \n",
- "23 Vienna AT \n",
- "24 Paris FR \n",
- "25 Tokyo JP \n",
- "26 Osaka JP \n",
- "27 San Diego US \n",
- "28 Verona IT \n",
- "29 Zurich CH \n",
- "... ... ... \n",
- "13029 Milan IT \n",
- "13030 Xi'an CN \n",
- "13031 Savannah US \n",
- "13032 Syracuse US \n",
- "13033 Tampa US \n",
- "13034 Olenegorsk RU \n",
- "13035 Shanghai CN \n",
- "13036 Venice IT \n",
- "13037 Vienna AT \n",
- "13038 Pisa IT \n",
- "13039 Vienna AT \n",
- "13040 London GB \n",
- "13041 Lima PE \n",
- "13042 Ottawa CA \n",
- "13043 Xi'an CN \n",
- "13044 Winnipeg CA \n",
- "13045 Brisbane AU \n",
- "13046 Xi'an CN \n",
- "13047 Portland US \n",
- "13048 Dubai AE \n",
- "13049 Winnipeg CA \n",
- "13050 Helsinki FI \n",
- "13051 Toronto CA \n",
- "13052 Melbourne AU \n",
- "13053 Zurich CH \n",
- "13054 Xi'an CN \n",
- "13055 Zurich CH \n",
- "13056 Belogorsk RU \n",
- "13057 Buenos Aires AR \n",
- "13058 Washington US \n",
- "\n",
- " DestLocation DestRegion \\\n",
- "0 {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
- "1 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "3 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
- "4 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "5 {'lat': '44.4133', 'lon': '8.8375'} IT-42 \n",
- "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "7 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n",
- "8 {'lat': '17.23131752', 'lon': '78.42985535'} SE-BD \n",
- "9 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
- "10 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n",
- "11 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "12 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n",
- "13 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n",
- "14 {'lat': '18.43939972', 'lon': '-66.00180054'} PR-U-A \n",
- "15 {'lat': '50.86589813', 'lon': '7.142739773'} DE-NW \n",
- "16 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "17 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n",
- "18 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n",
- "19 {'lat': '28.5665', 'lon': '77.103104'} SE-BD \n",
- "20 {'lat': '37.64989853', 'lon': '-97.43309784'} US-KS \n",
- "21 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n",
- "22 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n",
- "23 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "24 {'lat': '49.01279831', 'lon': '2.549999952'} FR-J \n",
- "25 {'lat': '35.76470184', 'lon': '140.3860016'} SE-BD \n",
- "26 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n",
- "27 {'lat': '32.73360062', 'lon': '-117.1900024'} US-CA \n",
- "28 {'lat': '45.395699', 'lon': '10.8885'} IT-34 \n",
- "29 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "... ... ... \n",
- "13029 {'lat': '45.6306', 'lon': '8.72811'} IT-25 \n",
- "13030 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "13031 {'lat': '32.12760162', 'lon': '-81.20210266'} US-GA \n",
- "13032 {'lat': '43.11119843', 'lon': '-76.10630035'} US-NY \n",
- "13033 {'lat': '27.97550011', 'lon': '-82.53320313'} US-FL \n",
- "13034 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n",
- "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n",
- "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "13037 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "13038 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n",
- "13039 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "13040 {'lat': '51.87469864', 'lon': '-0.368333012'} GB-ENG \n",
- "13041 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n",
- "13042 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n",
- "13043 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "13044 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n",
- "13045 {'lat': '-27.38419914', 'lon': '153.1170044'} SE-BD \n",
- "13046 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "13047 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n",
- "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n",
- "13049 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n",
- "13050 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n",
- "13051 {'lat': '43.67720032', 'lon': '-79.63059998'} CA-ON \n",
- "13052 {'lat': '-37.673302', 'lon': '144.843002'} SE-BD \n",
- "13053 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "13054 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "13055 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "13056 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n",
- "13057 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n",
- "13058 {'lat': '38.94449997', 'lon': '-77.45580292'} US-DC \n",
- "\n",
- " DestWeather ... FlightTimeMin \\\n",
- "0 Rain ... 1030.770416 \n",
- "1 Sunny ... 464.389481 \n",
- "2 Cloudy ... 0.000000 \n",
- "3 Clear ... 222.749059 \n",
- "4 Clear ... 785.779071 \n",
- "5 Thunder & Lightning ... 393.590441 \n",
- "6 Hail ... 300.000000 \n",
- "7 Clear ... 614.942480 \n",
- "8 Cloudy ... 602.030591 \n",
- "9 Rain ... 174.822216 \n",
- "10 Rain ... 503.045170 \n",
- "11 Cloudy ... 36.075018 \n",
- "12 Clear ... 679.768391 \n",
- "13 Rain ... 330.418282 \n",
- "14 Clear ... 407.145031 \n",
- "15 Sunny ... 656.712658 \n",
- "16 Damaging Wind ... 773.030334 \n",
- "17 Cloudy ... 704.716920 \n",
- "18 Clear ... 355.957996 \n",
- "19 Clear ... 875.114675 \n",
- "20 Clear ... 373.966883 \n",
- "21 Hail ... 130.667700 \n",
- "22 Damaging Wind ... 574.495310 \n",
- "23 Heavy Fog ... 579.728943 \n",
- "24 Clear ... 50.157229 \n",
- "25 Rain ... 527.567422 \n",
- "26 Hail ... 386.259764 \n",
- "27 Clear ... 24.479650 \n",
- "28 Sunny ... 568.351033 \n",
- "29 Rain ... 425.889194 \n",
- "... ... ... ... \n",
- "13029 Sunny ... 534.375826 \n",
- "13030 Damaging Wind ... 141.172633 \n",
- "13031 Thunder & Lightning ... 1113.137060 \n",
- "13032 Rain ... 714.964864 \n",
- "13033 Rain ... 234.929046 \n",
- "13034 Clear ... 526.895776 \n",
- "13035 Thunder & Lightning ... 0.000000 \n",
- "13036 Sunny ... 150.000000 \n",
- "13037 Rain ... 691.944839 \n",
- "13038 Heavy Fog ... 567.387339 \n",
- "13039 Thunder & Lightning ... 690.092327 \n",
- "13040 Cloudy ... 3.028293 \n",
- "13041 Sunny ... 338.875531 \n",
- "13042 Clear ... 375.129587 \n",
- "13043 Clear ... 156.858481 \n",
- "13044 Clear ... 354.106457 \n",
- "13045 Rain ... 771.305442 \n",
- "13046 Rain ... 542.955572 \n",
- "13047 Thunder & Lightning ... 564.599857 \n",
- "13048 Sunny ... 180.000000 \n",
- "13049 Heavy Fog ... 835.954429 \n",
- "13050 Sunny ... 451.755639 \n",
- "13051 Sunny ... 507.451571 \n",
- "13052 Cloudy ... 1044.451122 \n",
- "13053 Hail ... 728.715904 \n",
- "13054 Rain ... 402.929088 \n",
- "13055 Rain ... 644.418029 \n",
- "13056 Rain ... 937.540811 \n",
- "13057 Hail ... 1697.404971 \n",
- "13058 Heavy Fog ... 1610.761827 \n",
- "\n",
- " Origin OriginAirportID \\\n",
- "0 Frankfurt am Main Airport FRA \n",
- "1 Cape Town International Airport CPT \n",
- "2 Venice Marco Polo Airport VE05 \n",
- "3 Naples International Airport NA01 \n",
- "4 Licenciado Benito Juarez International Airport AICM \n",
- "5 Edmonton International Airport CYEG \n",
- "6 Zurich Airport ZRH \n",
- "7 Ciampino___G. B. Pastine International Airport RM12 \n",
- "8 Milano Linate Airport MI11 \n",
- "9 Sheremetyevo International Airport SVO \n",
- "10 Albuquerque International Sunport Airport ABQ \n",
- "11 Venice Marco Polo Airport VE05 \n",
- "12 Licenciado Benito Juarez International Airport AICM \n",
- "13 Naples International Airport NA01 \n",
- "14 Ciampino___G. B. Pastine International Airport RM12 \n",
- "15 Chengdu Shuangliu International Airport CTU \n",
- "16 Licenciado Benito Juarez International Airport AICM \n",
- "17 Cleveland Hopkins International Airport CLE \n",
- "18 Olenya Air Base XLMO \n",
- "19 Casper-Natrona County International Airport CPR \n",
- "20 Erie International Tom Ridge Field ERI \n",
- "21 Newark Liberty International Airport EWR \n",
- "22 Copenhagen Kastrup Airport CPH \n",
- "23 Seattle Tacoma International Airport SEA \n",
- "24 Berlin-Tegel Airport TXL \n",
- "25 Manchester Airport MAN \n",
- "26 Helsinki Vantaa Airport HEL \n",
- "27 Phoenix Sky Harbor International Airport PHX \n",
- "28 New Chitose Airport CTS \n",
- "29 Tulsa International Airport TUL \n",
- "... ... ... \n",
- "13029 Itami Airport ITM \n",
- "13030 Tokyo Haneda International Airport HND \n",
- "13031 OR Tambo International Airport JNB \n",
- "13032 El Dorado International Airport BOG \n",
- "13033 Jorge Chavez International Airport LIM \n",
- "13034 Gimpo International Airport GMP \n",
- "13035 Shanghai Pudong International Airport PVG \n",
- "13036 Venice Marco Polo Airport VE05 \n",
- "13037 Ukrainka Air Base XHBU \n",
- "13038 OR Tambo International Airport JNB \n",
- "13039 Montreal / Pierre Elliott Trudeau Internationa... YUL \n",
- "13040 London Heathrow Airport LHR \n",
- "13041 Casper-Natrona County International Airport CPR \n",
- "13042 Frankfurt am Main Airport FRA \n",
- "13043 Tokyo Haneda International Airport HND \n",
- "13044 Vienna International Airport VIE \n",
- "13045 Amsterdam Airport Schiphol AMS \n",
- "13046 Winnipeg / James Armstrong Richardson Internat... YWG \n",
- "13047 Jeju International Airport CJU \n",
- "13048 Dubai International Airport DXB \n",
- "13049 Ministro Pistarini International Airport EZE \n",
- "13050 Beijing Capital International Airport PEK \n",
- "13051 Leonardo da Vinci___Fiumicino Airport RM11 \n",
- "13052 Bologna Guglielmo Marconi Airport BO08 \n",
- "13053 Portland International Jetport Airport PWM \n",
- "13054 Pisa International Airport PI05 \n",
- "13055 Winnipeg / James Armstrong Richardson Internat... YWG \n",
- "13056 Licenciado Benito Juarez International Airport AICM \n",
- "13057 Itami Airport ITM \n",
- "13058 Adelaide International Airport ADL \n",
- "\n",
- " OriginCityName OriginCountry \\\n",
- "0 Frankfurt am Main DE \n",
- "1 Cape Town ZA \n",
- "2 Venice IT \n",
- "3 Naples IT \n",
- "4 Mexico City MX \n",
- "5 Edmonton CA \n",
- "6 Zurich CH \n",
- "7 Rome IT \n",
- "8 Milan IT \n",
- "9 Moscow RU \n",
- "10 Albuquerque US \n",
- "11 Venice IT \n",
- "12 Mexico City MX \n",
- "13 Naples IT \n",
- "14 Rome IT \n",
- "15 Chengdu CN \n",
- "16 Mexico City MX \n",
- "17 Cleveland US \n",
- "18 Olenegorsk RU \n",
- "19 Casper US \n",
- "20 Erie US \n",
- "21 Newark US \n",
- "22 Copenhagen DK \n",
- "23 Seattle US \n",
- "24 Berlin DE \n",
- "25 Manchester GB \n",
- "26 Helsinki FI \n",
- "27 Phoenix US \n",
- "28 Chitose / Tomakomai JP \n",
- "29 Tulsa US \n",
- "... ... ... \n",
- "13029 Osaka JP \n",
- "13030 Tokyo JP \n",
- "13031 Johannesburg ZA \n",
- "13032 Bogota CO \n",
- "13033 Lima PE \n",
- "13034 Seoul KR \n",
- "13035 Shanghai CN \n",
- "13036 Venice IT \n",
- "13037 Belogorsk RU \n",
- "13038 Johannesburg ZA \n",
- "13039 Montreal CA \n",
- "13040 London GB \n",
- "13041 Casper US \n",
- "13042 Frankfurt am Main DE \n",
- "13043 Tokyo JP \n",
- "13044 Vienna AT \n",
- "13045 Amsterdam NL \n",
- "13046 Winnipeg CA \n",
- "13047 Jeju City KR \n",
- "13048 Dubai AE \n",
- "13049 Buenos Aires AR \n",
- "13050 Beijing CN \n",
- "13051 Rome IT \n",
- "13052 Bologna IT \n",
- "13053 Portland US \n",
- "13054 Pisa IT \n",
- "13055 Winnipeg CA \n",
- "13056 Mexico City MX \n",
- "13057 Osaka JP \n",
- "13058 Adelaide AU \n",
- "\n",
- " OriginLocation OriginRegion \\\n",
- "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
- "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
- "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
- "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "5 {'lat': '53.30970001', 'lon': '-113.5800018'} CA-AB \n",
- "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "7 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n",
- "8 {'lat': '45.445099', 'lon': '9.27674'} IT-25 \n",
- "9 {'lat': '55.972599', 'lon': '37.4146'} RU-MOS \n",
- "10 {'lat': '35.040199', 'lon': '-106.609001'} US-NM \n",
- "11 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "12 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "13 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
- "14 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n",
- "15 {'lat': '30.57850075', 'lon': '103.9469986'} SE-BD \n",
- "16 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "17 {'lat': '41.4117012', 'lon': '-81.84980011'} US-OH \n",
- "18 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n",
- "19 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n",
- "20 {'lat': '42.08312701', 'lon': '-80.17386675'} US-PA \n",
- "21 {'lat': '40.69250107', 'lon': '-74.16870117'} US-NJ \n",
- "22 {'lat': '55.61790085', 'lon': '12.65600014'} DK-84 \n",
- "23 {'lat': '47.44900131', 'lon': '-122.3089981'} US-WA \n",
- "24 {'lat': '52.5597', 'lon': '13.2877'} DE-BE \n",
- "25 {'lat': '53.35369873', 'lon': '-2.274950027'} GB-ENG \n",
- "26 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n",
- "27 {'lat': '33.43429947', 'lon': '-112.012001'} US-AZ \n",
- "28 {'lat': '42.77519989', 'lon': '141.6920013'} SE-BD \n",
- "29 {'lat': '36.19839859', 'lon': '-95.88809967'} US-OK \n",
- "... ... ... \n",
- "13029 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n",
- "13030 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n",
- "13031 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n",
- "13032 {'lat': '4.70159', 'lon': '-74.1469'} CO-CUN \n",
- "13033 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n",
- "13034 {'lat': '37.5583', 'lon': '126.791'} SE-BD \n",
- "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n",
- "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "13037 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n",
- "13038 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n",
- "13039 {'lat': '45.47060013', 'lon': '-73.74079895'} CA-QC \n",
- "13040 {'lat': '51.4706', 'lon': '-0.461941'} GB-ENG \n",
- "13041 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n",
- "13042 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
- "13043 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n",
- "13044 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "13045 {'lat': '52.30860138', 'lon': '4.76388979'} NL-NH \n",
- "13046 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n",
- "13047 {'lat': '33.51129913', 'lon': '126.4929962'} SE-BD \n",
- "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n",
- "13049 {'lat': '-34.8222', 'lon': '-58.5358'} AR-B \n",
- "13050 {'lat': '40.08010101', 'lon': '116.5849991'} SE-BD \n",
- "13051 {'lat': '41.8002778', 'lon': '12.2388889'} IT-62 \n",
- "13052 {'lat': '44.5354', 'lon': '11.2887'} IT-45 \n",
- "13053 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n",
- "13054 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n",
- "13055 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n",
- "13056 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "13057 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n",
- "13058 {'lat': '-34.945', 'lon': '138.531006'} SE-BD \n",
- "\n",
- " OriginWeather dayOfWeek timestamp \n",
- "0 Sunny 0 2018-01-01 00:00:00 \n",
- "1 Clear 0 2018-01-01 18:27:00 \n",
- "2 Rain 0 2018-01-01 17:11:14 \n",
- "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n",
- "4 Damaging Wind 0 2018-01-01 05:13:00 \n",
- "5 Rain 0 2018-01-01 01:43:03 \n",
- "6 Clear 0 2018-01-01 13:49:53 \n",
- "7 Thunder & Lightning 0 2018-01-01 04:54:59 \n",
- "8 Heavy Fog 0 2018-01-01 12:09:35 \n",
- "9 Cloudy 0 2018-01-01 12:09:35 \n",
- "10 Rain 0 2018-01-01 22:06:14 \n",
- "11 Rain 0 2018-01-01 11:52:34 \n",
- "12 Heavy Fog 0 2018-01-01 02:13:46 \n",
- "13 Rain 0 2018-01-01 14:21:13 \n",
- "14 Cloudy 0 2018-01-01 17:42:53 \n",
- "15 Thunder & Lightning 0 2018-01-01 19:55:32 \n",
- "16 Thunder & Lightning 0 2018-01-01 07:49:27 \n",
- "17 Rain 0 2018-01-01 01:30:47 \n",
- "18 Hail 0 2018-01-01 07:58:17 \n",
- "19 Cloudy 0 2018-01-01 00:02:06 \n",
- "20 Cloudy 0 2018-01-01 01:08:20 \n",
- "21 Clear 0 2018-01-01 01:08:20 \n",
- "22 Sunny 0 2018-01-01 07:48:35 \n",
- "23 Heavy Fog 0 2018-01-01 18:57:21 \n",
- "24 Rain 0 2018-01-01 13:18:25 \n",
- "25 Thunder & Lightning 0 2018-01-01 08:20:35 \n",
- "26 Rain 0 2018-01-01 15:38:32 \n",
- "27 Clear 0 2018-01-01 03:08:45 \n",
- "28 Damaging Wind 0 2018-01-01 01:16:59 \n",
- "29 Rain 0 2018-01-01 18:00:59 \n",
- "... ... ... ... \n",
- "13029 Sunny 6 2018-02-11 20:10:13 \n",
- "13030 Clear 6 2018-02-11 18:59:53 \n",
- "13031 Hail 6 2018-02-11 00:57:48 \n",
- "13032 Thunder & Lightning 6 2018-02-11 12:02:49 \n",
- "13033 Thunder & Lightning 6 2018-02-11 02:07:40 \n",
- "13034 Sunny 6 2018-02-11 00:35:04 \n",
- "13035 Thunder & Lightning 6 2018-02-11 11:19:12 \n",
- "13036 Cloudy 6 2018-02-11 15:07:11 \n",
- "13037 Damaging Wind 6 2018-02-11 10:24:42 \n",
- "13038 Damaging Wind 6 2018-02-11 00:42:06 \n",
- "13039 Thunder & Lightning 6 2018-02-11 10:56:31 \n",
- "13040 Clear 6 2018-02-11 00:39:37 \n",
- "13041 Rain 6 2018-02-11 10:24:30 \n",
- "13042 Clear 6 2018-02-11 09:02:07 \n",
- "13043 Thunder & Lightning 6 2018-02-11 04:45:06 \n",
- "13044 Thunder & Lightning 6 2018-02-11 00:51:14 \n",
- "13045 Sunny 6 2018-02-11 05:41:51 \n",
- "13046 Hail 6 2018-02-11 10:02:21 \n",
- "13047 Cloudy 6 2018-02-11 15:55:10 \n",
- "13048 Hail 6 2018-02-11 04:11:14 \n",
- "13049 Sunny 6 2018-02-11 10:13:32 \n",
- "13050 Cloudy 6 2018-02-11 11:23:23 \n",
- "13051 Hail 6 2018-02-11 01:13:50 \n",
- "13052 Cloudy 6 2018-02-11 18:35:42 \n",
- "13053 Clear 6 2018-02-11 19:02:10 \n",
- "13054 Sunny 6 2018-02-11 20:42:25 \n",
- "13055 Rain 6 2018-02-11 01:41:57 \n",
- "13056 Sunny 6 2018-02-11 04:09:27 \n",
- "13057 Hail 6 2018-02-11 08:28:21 \n",
- "13058 Rain 6 2018-02-11 14:54:34 \n",
- "\n",
- "[13059 rows x 27 columns]"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " AvgTicketPrice | \n",
- " Cancelled | \n",
- " Carrier | \n",
- " Dest | \n",
- " DestAirportID | \n",
- " DestCityName | \n",
- " DestCountry | \n",
- " DestLocation | \n",
- " DestRegion | \n",
- " DestWeather | \n",
- " ... | \n",
- " FlightTimeMin | \n",
- " Origin | \n",
- " OriginAirportID | \n",
- " OriginCityName | \n",
- " OriginCountry | \n",
- " OriginLocation | \n",
- " OriginRegion | \n",
- " OriginWeather | \n",
- " dayOfWeek | \n",
- " timestamp | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 841.265642 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Sydney Kingsford Smith International Airport | \n",
- " SYD | \n",
- " Sydney | \n",
- " AU | \n",
- " {'lat': '-33.94609833', 'lon': '151.177002'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 1030.770416 | \n",
- " Frankfurt am Main Airport | \n",
- " FRA | \n",
- " Frankfurt am Main | \n",
- " DE | \n",
- " {'lat': '50.033333', 'lon': '8.570556'} | \n",
- " DE-HE | \n",
- " Sunny | \n",
- " 0 | \n",
- " 2018-01-01 00:00:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 882.982662 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Sunny | \n",
- " ... | \n",
- " 464.389481 | \n",
- " Cape Town International Airport | \n",
- " CPT | \n",
- " Cape Town | \n",
- " ZA | \n",
- " {'lat': '-33.96480179', 'lon': '18.60169983'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 18:27:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 190.636904 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Cloudy | \n",
- " ... | \n",
- " 0.000000 | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 17:11:14 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 181.694216 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Treviso-Sant'Angelo Airport | \n",
- " TV01 | \n",
- " Treviso | \n",
- " IT | \n",
- " {'lat': '45.648399', 'lon': '12.1944'} | \n",
- " IT-34 | \n",
- " Clear | \n",
- " ... | \n",
- " 222.749059 | \n",
- " Naples International Airport | \n",
- " NA01 | \n",
- " Naples | \n",
- " IT | \n",
- " {'lat': '40.886002', 'lon': '14.2908'} | \n",
- " IT-72 | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 10:33:28 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 730.041778 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 785.779071 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Damaging Wind | \n",
- " 0 | \n",
- " 2018-01-01 05:13:00 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 418.152089 | \n",
- " False | \n",
- " JetBeats | \n",
- " Genoa Cristoforo Colombo Airport | \n",
- " GE01 | \n",
- " Genova | \n",
- " IT | \n",
- " {'lat': '44.4133', 'lon': '8.8375'} | \n",
- " IT-42 | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 393.590441 | \n",
- " Edmonton International Airport | \n",
- " CYEG | \n",
- " Edmonton | \n",
- " CA | \n",
- " {'lat': '53.30970001', 'lon': '-113.5800018'} | \n",
- " CA-AB | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 01:43:03 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 180.246816 | \n",
- " False | \n",
- " JetBeats | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Hail | \n",
- " ... | \n",
- " 300.000000 | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 13:49:53 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 585.184310 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Ottawa Macdonald-Cartier International Airport | \n",
- " YOW | \n",
- " Ottawa | \n",
- " CA | \n",
- " {'lat': '45.32249832', 'lon': '-75.66919708'} | \n",
- " CA-ON | \n",
- " Clear | \n",
- " ... | \n",
- " 614.942480 | \n",
- " Ciampino___G. B. Pastine International Airport | \n",
- " RM12 | \n",
- " Rome | \n",
- " IT | \n",
- " {'lat': '41.7994', 'lon': '12.5949'} | \n",
- " IT-62 | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 04:54:59 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 960.869736 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Rajiv Gandhi International Airport | \n",
- " HYD | \n",
- " Hyderabad | \n",
- " IN | \n",
- " {'lat': '17.23131752', 'lon': '78.42985535'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " ... | \n",
- " 602.030591 | \n",
- " Milano Linate Airport | \n",
- " MI11 | \n",
- " Milan | \n",
- " IT | \n",
- " {'lat': '45.445099', 'lon': '9.27674'} | \n",
- " IT-25 | \n",
- " Heavy Fog | \n",
- " 0 | \n",
- " 2018-01-01 12:09:35 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 296.877773 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Treviso-Sant'Angelo Airport | \n",
- " TV01 | \n",
- " Treviso | \n",
- " IT | \n",
- " {'lat': '45.648399', 'lon': '12.1944'} | \n",
- " IT-34 | \n",
- " Rain | \n",
- " ... | \n",
- " 174.822216 | \n",
- " Sheremetyevo International Airport | \n",
- " SVO | \n",
- " Moscow | \n",
- " RU | \n",
- " {'lat': '55.972599', 'lon': '37.4146'} | \n",
- " RU-MOS | \n",
- " Cloudy | \n",
- " 0 | \n",
- " 2018-01-01 12:09:35 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 906.437948 | \n",
- " False | \n",
- " JetBeats | \n",
- " Helsinki Vantaa Airport | \n",
- " HEL | \n",
- " Helsinki | \n",
- " FI | \n",
- " {'lat': '60.31719971', 'lon': '24.9633007'} | \n",
- " FI-ES | \n",
- " Rain | \n",
- " ... | \n",
- " 503.045170 | \n",
- " Albuquerque International Sunport Airport | \n",
- " ABQ | \n",
- " Albuquerque | \n",
- " US | \n",
- " {'lat': '35.040199', 'lon': '-106.609001'} | \n",
- " US-NM | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 22:06:14 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 704.463771 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Cloudy | \n",
- " ... | \n",
- " 36.075018 | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 11:52:34 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 922.499077 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Shanghai Pudong International Airport | \n",
- " PVG | \n",
- " Shanghai | \n",
- " CN | \n",
- " {'lat': '31.14340019', 'lon': '121.8050003'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 679.768391 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Heavy Fog | \n",
- " 0 | \n",
- " 2018-01-01 02:13:46 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 374.959276 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Ottawa Macdonald-Cartier International Airport | \n",
- " YOW | \n",
- " Ottawa | \n",
- " CA | \n",
- " {'lat': '45.32249832', 'lon': '-75.66919708'} | \n",
- " CA-ON | \n",
- " Rain | \n",
- " ... | \n",
- " 330.418282 | \n",
- " Naples International Airport | \n",
- " NA01 | \n",
- " Naples | \n",
- " IT | \n",
- " {'lat': '40.886002', 'lon': '14.2908'} | \n",
- " IT-72 | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 14:21:13 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 552.917371 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Luis Munoz Marin International Airport | \n",
- " SJU | \n",
- " San Juan | \n",
- " PR | \n",
- " {'lat': '18.43939972', 'lon': '-66.00180054'} | \n",
- " PR-U-A | \n",
- " Clear | \n",
- " ... | \n",
- " 407.145031 | \n",
- " Ciampino___G. B. Pastine International Airport | \n",
- " RM12 | \n",
- " Rome | \n",
- " IT | \n",
- " {'lat': '41.7994', 'lon': '12.5949'} | \n",
- " IT-62 | \n",
- " Cloudy | \n",
- " 0 | \n",
- " 2018-01-01 17:42:53 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 566.487557 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Cologne Bonn Airport | \n",
- " CGN | \n",
- " Cologne | \n",
- " DE | \n",
- " {'lat': '50.86589813', 'lon': '7.142739773'} | \n",
- " DE-NW | \n",
- " Sunny | \n",
- " ... | \n",
- " 656.712658 | \n",
- " Chengdu Shuangliu International Airport | \n",
- " CTU | \n",
- " Chengdu | \n",
- " CN | \n",
- " {'lat': '30.57850075', 'lon': '103.9469986'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 19:55:32 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 989.952787 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Damaging Wind | \n",
- " ... | \n",
- " 773.030334 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 07:49:27 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 569.613255 | \n",
- " False | \n",
- " ES-Air | \n",
- " Ministro Pistarini International Airport | \n",
- " EZE | \n",
- " Buenos Aires | \n",
- " AR | \n",
- " {'lat': '-34.8222', 'lon': '-58.5358'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " ... | \n",
- " 704.716920 | \n",
- " Cleveland Hopkins International Airport | \n",
- " CLE | \n",
- " Cleveland | \n",
- " US | \n",
- " {'lat': '41.4117012', 'lon': '-81.84980011'} | \n",
- " US-OH | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 01:30:47 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 277.429707 | \n",
- " False | \n",
- " ES-Air | \n",
- " Shanghai Pudong International Airport | \n",
- " PVG | \n",
- " Shanghai | \n",
- " CN | \n",
- " {'lat': '31.14340019', 'lon': '121.8050003'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 355.957996 | \n",
- " Olenya Air Base | \n",
- " XLMO | \n",
- " Olenegorsk | \n",
- " RU | \n",
- " {'lat': '68.15180206', 'lon': '33.46390152'} | \n",
- " RU-MUR | \n",
- " Hail | \n",
- " 0 | \n",
- " 2018-01-01 07:58:17 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 772.100846 | \n",
- " False | \n",
- " JetBeats | \n",
- " Indira Gandhi International Airport | \n",
- " DEL | \n",
- " New Delhi | \n",
- " IN | \n",
- " {'lat': '28.5665', 'lon': '77.103104'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 875.114675 | \n",
- " Casper-Natrona County International Airport | \n",
- " CPR | \n",
- " Casper | \n",
- " US | \n",
- " {'lat': '42.90800095', 'lon': '-106.4639969'} | \n",
- " US-WY | \n",
- " Cloudy | \n",
- " 0 | \n",
- " 2018-01-01 00:02:06 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 167.599922 | \n",
- " False | \n",
- " JetBeats | \n",
- " Wichita Mid Continent Airport | \n",
- " ICT | \n",
- " Wichita | \n",
- " US | \n",
- " {'lat': '37.64989853', 'lon': '-97.43309784'} | \n",
- " US-KS | \n",
- " Clear | \n",
- " ... | \n",
- " 373.966883 | \n",
- " Erie International Tom Ridge Field | \n",
- " ERI | \n",
- " Erie | \n",
- " US | \n",
- " {'lat': '42.08312701', 'lon': '-80.17386675'} | \n",
- " US-PA | \n",
- " Cloudy | \n",
- " 0 | \n",
- " 2018-01-01 01:08:20 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 253.210065 | \n",
- " False | \n",
- " ES-Air | \n",
- " Ottawa Macdonald-Cartier International Airport | \n",
- " YOW | \n",
- " Ottawa | \n",
- " CA | \n",
- " {'lat': '45.32249832', 'lon': '-75.66919708'} | \n",
- " CA-ON | \n",
- " Hail | \n",
- " ... | \n",
- " 130.667700 | \n",
- " Newark Liberty International Airport | \n",
- " EWR | \n",
- " Newark | \n",
- " US | \n",
- " {'lat': '40.69250107', 'lon': '-74.16870117'} | \n",
- " US-NJ | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 01:08:20 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 917.247620 | \n",
- " False | \n",
- " JetBeats | \n",
- " Itami Airport | \n",
- " ITM | \n",
- " Osaka | \n",
- " JP | \n",
- " {'lat': '34.78549957', 'lon': '135.4380035'} | \n",
- " SE-BD | \n",
- " Damaging Wind | \n",
- " ... | \n",
- " 574.495310 | \n",
- " Copenhagen Kastrup Airport | \n",
- " CPH | \n",
- " Copenhagen | \n",
- " DK | \n",
- " {'lat': '55.61790085', 'lon': '12.65600014'} | \n",
- " DK-84 | \n",
- " Sunny | \n",
- " 0 | \n",
- " 2018-01-01 07:48:35 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 451.591176 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Heavy Fog | \n",
- " ... | \n",
- " 579.728943 | \n",
- " Seattle Tacoma International Airport | \n",
- " SEA | \n",
- " Seattle | \n",
- " US | \n",
- " {'lat': '47.44900131', 'lon': '-122.3089981'} | \n",
- " US-WA | \n",
- " Heavy Fog | \n",
- " 0 | \n",
- " 2018-01-01 18:57:21 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 307.067201 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Charles de Gaulle International Airport | \n",
- " CDG | \n",
- " Paris | \n",
- " FR | \n",
- " {'lat': '49.01279831', 'lon': '2.549999952'} | \n",
- " FR-J | \n",
- " Clear | \n",
- " ... | \n",
- " 50.157229 | \n",
- " Berlin-Tegel Airport | \n",
- " TXL | \n",
- " Berlin | \n",
- " DE | \n",
- " {'lat': '52.5597', 'lon': '13.2877'} | \n",
- " DE-BE | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 13:18:25 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 268.241596 | \n",
- " False | \n",
- " ES-Air | \n",
- " Narita International Airport | \n",
- " NRT | \n",
- " Tokyo | \n",
- " JP | \n",
- " {'lat': '35.76470184', 'lon': '140.3860016'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 527.567422 | \n",
- " Manchester Airport | \n",
- " MAN | \n",
- " Manchester | \n",
- " GB | \n",
- " {'lat': '53.35369873', 'lon': '-2.274950027'} | \n",
- " GB-ENG | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 08:20:35 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 975.812632 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Itami Airport | \n",
- " ITM | \n",
- " Osaka | \n",
- " JP | \n",
- " {'lat': '34.78549957', 'lon': '135.4380035'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " ... | \n",
- " 386.259764 | \n",
- " Helsinki Vantaa Airport | \n",
- " HEL | \n",
- " Helsinki | \n",
- " FI | \n",
- " {'lat': '60.31719971', 'lon': '24.9633007'} | \n",
- " FI-ES | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 15:38:32 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 134.214546 | \n",
- " False | \n",
- " JetBeats | \n",
- " San Diego International Airport | \n",
- " SAN | \n",
- " San Diego | \n",
- " US | \n",
- " {'lat': '32.73360062', 'lon': '-117.1900024'} | \n",
- " US-CA | \n",
- " Clear | \n",
- " ... | \n",
- " 24.479650 | \n",
- " Phoenix Sky Harbor International Airport | \n",
- " PHX | \n",
- " Phoenix | \n",
- " US | \n",
- " {'lat': '33.43429947', 'lon': '-112.012001'} | \n",
- " US-AZ | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 03:08:45 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 988.897564 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Verona Villafranca Airport | \n",
- " VR10 | \n",
- " Verona | \n",
- " IT | \n",
- " {'lat': '45.395699', 'lon': '10.8885'} | \n",
- " IT-34 | \n",
- " Sunny | \n",
- " ... | \n",
- " 568.351033 | \n",
- " New Chitose Airport | \n",
- " CTS | \n",
- " Chitose / Tomakomai | \n",
- " JP | \n",
- " {'lat': '42.77519989', 'lon': '141.6920013'} | \n",
- " SE-BD | \n",
- " Damaging Wind | \n",
- " 0 | \n",
- " 2018-01-01 01:16:59 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 511.067220 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Rain | \n",
- " ... | \n",
- " 425.889194 | \n",
- " Tulsa International Airport | \n",
- " TUL | \n",
- " Tulsa | \n",
- " US | \n",
- " {'lat': '36.19839859', 'lon': '-95.88809967'} | \n",
- " US-OK | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 18:00:59 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 13029 | \n",
- " 795.905278 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Malpensa International Airport | \n",
- " MI12 | \n",
- " Milan | \n",
- " IT | \n",
- " {'lat': '45.6306', 'lon': '8.72811'} | \n",
- " IT-25 | \n",
- " Sunny | \n",
- " ... | \n",
- " 534.375826 | \n",
- " Itami Airport | \n",
- " ITM | \n",
- " Osaka | \n",
- " JP | \n",
- " {'lat': '34.78549957', 'lon': '135.4380035'} | \n",
- " SE-BD | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 20:10:13 | \n",
- "
\n",
- " \n",
- " 13030 | \n",
- " 863.388068 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Damaging Wind | \n",
- " ... | \n",
- " 141.172633 | \n",
- " Tokyo Haneda International Airport | \n",
- " HND | \n",
- " Tokyo | \n",
- " JP | \n",
- " {'lat': '35.552299', 'lon': '139.779999'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " 6 | \n",
- " 2018-02-11 18:59:53 | \n",
- "
\n",
- " \n",
- " 13031 | \n",
- " 575.183008 | \n",
- " False | \n",
- " JetBeats | \n",
- " Savannah Hilton Head International Airport | \n",
- " SAV | \n",
- " Savannah | \n",
- " US | \n",
- " {'lat': '32.12760162', 'lon': '-81.20210266'} | \n",
- " US-GA | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 1113.137060 | \n",
- " OR Tambo International Airport | \n",
- " JNB | \n",
- " Johannesburg | \n",
- " ZA | \n",
- " {'lat': '-26.1392', 'lon': '28.246'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 00:57:48 | \n",
- "
\n",
- " \n",
- " 13032 | \n",
- " 817.368952 | \n",
- " False | \n",
- " JetBeats | \n",
- " Syracuse Hancock International Airport | \n",
- " SYR | \n",
- " Syracuse | \n",
- " US | \n",
- " {'lat': '43.11119843', 'lon': '-76.10630035'} | \n",
- " US-NY | \n",
- " Rain | \n",
- " ... | \n",
- " 714.964864 | \n",
- " El Dorado International Airport | \n",
- " BOG | \n",
- " Bogota | \n",
- " CO | \n",
- " {'lat': '4.70159', 'lon': '-74.1469'} | \n",
- " CO-CUN | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 12:02:49 | \n",
- "
\n",
- " \n",
- " 13033 | \n",
- " 579.582455 | \n",
- " False | \n",
- " ES-Air | \n",
- " Tampa International Airport | \n",
- " TPA | \n",
- " Tampa | \n",
- " US | \n",
- " {'lat': '27.97550011', 'lon': '-82.53320313'} | \n",
- " US-FL | \n",
- " Rain | \n",
- " ... | \n",
- " 234.929046 | \n",
- " Jorge Chavez International Airport | \n",
- " LIM | \n",
- " Lima | \n",
- " PE | \n",
- " {'lat': '-12.0219', 'lon': '-77.114304'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 02:07:40 | \n",
- "
\n",
- " \n",
- " 13034 | \n",
- " 1004.916638 | \n",
- " False | \n",
- " JetBeats | \n",
- " Olenya Air Base | \n",
- " XLMO | \n",
- " Olenegorsk | \n",
- " RU | \n",
- " {'lat': '68.15180206', 'lon': '33.46390152'} | \n",
- " RU-MUR | \n",
- " Clear | \n",
- " ... | \n",
- " 526.895776 | \n",
- " Gimpo International Airport | \n",
- " GMP | \n",
- " Seoul | \n",
- " KR | \n",
- " {'lat': '37.5583', 'lon': '126.791'} | \n",
- " SE-BD | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 00:35:04 | \n",
- "
\n",
- " \n",
- " 13035 | \n",
- " 357.562842 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Shanghai Pudong International Airport | \n",
- " PVG | \n",
- " Shanghai | \n",
- " CN | \n",
- " {'lat': '31.14340019', 'lon': '121.8050003'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 0.000000 | \n",
- " Shanghai Pudong International Airport | \n",
- " PVG | \n",
- " Shanghai | \n",
- " CN | \n",
- " {'lat': '31.14340019', 'lon': '121.8050003'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 11:19:12 | \n",
- "
\n",
- " \n",
- " 13036 | \n",
- " 429.580539 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Sunny | \n",
- " ... | \n",
- " 150.000000 | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Cloudy | \n",
- " 6 | \n",
- " 2018-02-11 15:07:11 | \n",
- "
\n",
- " \n",
- " 13037 | \n",
- " 729.788171 | \n",
- " True | \n",
- " ES-Air | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Rain | \n",
- " ... | \n",
- " 691.944839 | \n",
- " Ukrainka Air Base | \n",
- " XHBU | \n",
- " Belogorsk | \n",
- " RU | \n",
- " {'lat': '51.169997', 'lon': '128.445007'} | \n",
- " RU-AMU | \n",
- " Damaging Wind | \n",
- " 6 | \n",
- " 2018-02-11 10:24:42 | \n",
- "
\n",
- " \n",
- " 13038 | \n",
- " 564.897695 | \n",
- " False | \n",
- " ES-Air | \n",
- " Pisa International Airport | \n",
- " PI05 | \n",
- " Pisa | \n",
- " IT | \n",
- " {'lat': '43.683899', 'lon': '10.3927'} | \n",
- " IT-52 | \n",
- " Heavy Fog | \n",
- " ... | \n",
- " 567.387339 | \n",
- " OR Tambo International Airport | \n",
- " JNB | \n",
- " Johannesburg | \n",
- " ZA | \n",
- " {'lat': '-26.1392', 'lon': '28.246'} | \n",
- " SE-BD | \n",
- " Damaging Wind | \n",
- " 6 | \n",
- " 2018-02-11 00:42:06 | \n",
- "
\n",
- " \n",
- " 13039 | \n",
- " 1014.052787 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 690.092327 | \n",
- " Montreal / Pierre Elliott Trudeau Internationa... | \n",
- " YUL | \n",
- " Montreal | \n",
- " CA | \n",
- " {'lat': '45.47060013', 'lon': '-73.74079895'} | \n",
- " CA-QC | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 10:56:31 | \n",
- "
\n",
- " \n",
- " 13040 | \n",
- " 455.243843 | \n",
- " False | \n",
- " ES-Air | \n",
- " London Luton Airport | \n",
- " LTN | \n",
- " London | \n",
- " GB | \n",
- " {'lat': '51.87469864', 'lon': '-0.368333012'} | \n",
- " GB-ENG | \n",
- " Cloudy | \n",
- " ... | \n",
- " 3.028293 | \n",
- " London Heathrow Airport | \n",
- " LHR | \n",
- " London | \n",
- " GB | \n",
- " {'lat': '51.4706', 'lon': '-0.461941'} | \n",
- " GB-ENG | \n",
- " Clear | \n",
- " 6 | \n",
- " 2018-02-11 00:39:37 | \n",
- "
\n",
- " \n",
- " 13041 | \n",
- " 611.370232 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Jorge Chavez International Airport | \n",
- " LIM | \n",
- " Lima | \n",
- " PE | \n",
- " {'lat': '-12.0219', 'lon': '-77.114304'} | \n",
- " SE-BD | \n",
- " Sunny | \n",
- " ... | \n",
- " 338.875531 | \n",
- " Casper-Natrona County International Airport | \n",
- " CPR | \n",
- " Casper | \n",
- " US | \n",
- " {'lat': '42.90800095', 'lon': '-106.4639969'} | \n",
- " US-WY | \n",
- " Rain | \n",
- " 6 | \n",
- " 2018-02-11 10:24:30 | \n",
- "
\n",
- " \n",
- " 13042 | \n",
- " 595.961285 | \n",
- " False | \n",
- " JetBeats | \n",
- " Ottawa Macdonald-Cartier International Airport | \n",
- " YOW | \n",
- " Ottawa | \n",
- " CA | \n",
- " {'lat': '45.32249832', 'lon': '-75.66919708'} | \n",
- " CA-ON | \n",
- " Clear | \n",
- " ... | \n",
- " 375.129587 | \n",
- " Frankfurt am Main Airport | \n",
- " FRA | \n",
- " Frankfurt am Main | \n",
- " DE | \n",
- " {'lat': '50.033333', 'lon': '8.570556'} | \n",
- " DE-HE | \n",
- " Clear | \n",
- " 6 | \n",
- " 2018-02-11 09:02:07 | \n",
- "
\n",
- " \n",
- " 13043 | \n",
- " 782.747648 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 156.858481 | \n",
- " Tokyo Haneda International Airport | \n",
- " HND | \n",
- " Tokyo | \n",
- " JP | \n",
- " {'lat': '35.552299', 'lon': '139.779999'} | \n",
- " SE-BD | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 04:45:06 | \n",
- "
\n",
- " \n",
- " 13044 | \n",
- " 891.117221 | \n",
- " False | \n",
- " JetBeats | \n",
- " Winnipeg / James Armstrong Richardson Internat... | \n",
- " YWG | \n",
- " Winnipeg | \n",
- " CA | \n",
- " {'lat': '49.90999985', 'lon': '-97.23989868'} | \n",
- " CA-MB | \n",
- " Clear | \n",
- " ... | \n",
- " 354.106457 | \n",
- " Vienna International Airport | \n",
- " VIE | \n",
- " Vienna | \n",
- " AT | \n",
- " {'lat': '48.11029816', 'lon': '16.56970024'} | \n",
- " AT-9 | \n",
- " Thunder & Lightning | \n",
- " 6 | \n",
- " 2018-02-11 00:51:14 | \n",
- "
\n",
- " \n",
- " 13045 | \n",
- " 587.169921 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Brisbane International Airport | \n",
- " BNE | \n",
- " Brisbane | \n",
- " AU | \n",
- " {'lat': '-27.38419914', 'lon': '153.1170044'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 771.305442 | \n",
- " Amsterdam Airport Schiphol | \n",
- " AMS | \n",
- " Amsterdam | \n",
- " NL | \n",
- " {'lat': '52.30860138', 'lon': '4.76388979'} | \n",
- " NL-NH | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 05:41:51 | \n",
- "
\n",
- " \n",
- " 13046 | \n",
- " 739.132165 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 542.955572 | \n",
- " Winnipeg / James Armstrong Richardson Internat... | \n",
- " YWG | \n",
- " Winnipeg | \n",
- " CA | \n",
- " {'lat': '49.90999985', 'lon': '-97.23989868'} | \n",
- " CA-MB | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 10:02:21 | \n",
- "
\n",
- " \n",
- " 13047 | \n",
- " 605.191876 | \n",
- " False | \n",
- " JetBeats | \n",
- " Portland International Jetport Airport | \n",
- " PWM | \n",
- " Portland | \n",
- " US | \n",
- " {'lat': '43.64619827', 'lon': '-70.30930328'} | \n",
- " US-ME | \n",
- " Thunder & Lightning | \n",
- " ... | \n",
- " 564.599857 | \n",
- " Jeju International Airport | \n",
- " CJU | \n",
- " Jeju City | \n",
- " KR | \n",
- " {'lat': '33.51129913', 'lon': '126.4929962'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " 6 | \n",
- " 2018-02-11 15:55:10 | \n",
- "
\n",
- " \n",
- " 13048 | \n",
- " 361.767659 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Dubai International Airport | \n",
- " DXB | \n",
- " Dubai | \n",
- " AE | \n",
- " {'lat': '25.25279999', 'lon': '55.36439896'} | \n",
- " SE-BD | \n",
- " Sunny | \n",
- " ... | \n",
- " 180.000000 | \n",
- " Dubai International Airport | \n",
- " DXB | \n",
- " Dubai | \n",
- " AE | \n",
- " {'lat': '25.25279999', 'lon': '55.36439896'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 04:11:14 | \n",
- "
\n",
- " \n",
- " 13049 | \n",
- " 662.306992 | \n",
- " False | \n",
- " ES-Air | \n",
- " Winnipeg / James Armstrong Richardson Internat... | \n",
- " YWG | \n",
- " Winnipeg | \n",
- " CA | \n",
- " {'lat': '49.90999985', 'lon': '-97.23989868'} | \n",
- " CA-MB | \n",
- " Heavy Fog | \n",
- " ... | \n",
- " 835.954429 | \n",
- " Ministro Pistarini International Airport | \n",
- " EZE | \n",
- " Buenos Aires | \n",
- " AR | \n",
- " {'lat': '-34.8222', 'lon': '-58.5358'} | \n",
- " AR-B | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 10:13:32 | \n",
- "
\n",
- " \n",
- " 13050 | \n",
- " 630.779526 | \n",
- " False | \n",
- " JetBeats | \n",
- " Helsinki Vantaa Airport | \n",
- " HEL | \n",
- " Helsinki | \n",
- " FI | \n",
- " {'lat': '60.31719971', 'lon': '24.9633007'} | \n",
- " FI-ES | \n",
- " Sunny | \n",
- " ... | \n",
- " 451.755639 | \n",
- " Beijing Capital International Airport | \n",
- " PEK | \n",
- " Beijing | \n",
- " CN | \n",
- " {'lat': '40.08010101', 'lon': '116.5849991'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " 6 | \n",
- " 2018-02-11 11:23:23 | \n",
- "
\n",
- " \n",
- " 13051 | \n",
- " 937.771279 | \n",
- " True | \n",
- " Logstash Airways | \n",
- " Lester B. Pearson International Airport | \n",
- " YYZ | \n",
- " Toronto | \n",
- " CA | \n",
- " {'lat': '43.67720032', 'lon': '-79.63059998'} | \n",
- " CA-ON | \n",
- " Sunny | \n",
- " ... | \n",
- " 507.451571 | \n",
- " Leonardo da Vinci___Fiumicino Airport | \n",
- " RM11 | \n",
- " Rome | \n",
- " IT | \n",
- " {'lat': '41.8002778', 'lon': '12.2388889'} | \n",
- " IT-62 | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 01:13:50 | \n",
- "
\n",
- " \n",
- " 13052 | \n",
- " 1085.155339 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Melbourne International Airport | \n",
- " MEL | \n",
- " Melbourne | \n",
- " AU | \n",
- " {'lat': '-37.673302', 'lon': '144.843002'} | \n",
- " SE-BD | \n",
- " Cloudy | \n",
- " ... | \n",
- " 1044.451122 | \n",
- " Bologna Guglielmo Marconi Airport | \n",
- " BO08 | \n",
- " Bologna | \n",
- " IT | \n",
- " {'lat': '44.5354', 'lon': '11.2887'} | \n",
- " IT-45 | \n",
- " Cloudy | \n",
- " 6 | \n",
- " 2018-02-11 18:35:42 | \n",
- "
\n",
- " \n",
- " 13053 | \n",
- " 1191.964104 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Hail | \n",
- " ... | \n",
- " 728.715904 | \n",
- " Portland International Jetport Airport | \n",
- " PWM | \n",
- " Portland | \n",
- " US | \n",
- " {'lat': '43.64619827', 'lon': '-70.30930328'} | \n",
- " US-ME | \n",
- " Clear | \n",
- " 6 | \n",
- " 2018-02-11 19:02:10 | \n",
- "
\n",
- " \n",
- " 13054 | \n",
- " 1080.446279 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 402.929088 | \n",
- " Pisa International Airport | \n",
- " PI05 | \n",
- " Pisa | \n",
- " IT | \n",
- " {'lat': '43.683899', 'lon': '10.3927'} | \n",
- " IT-52 | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 20:42:25 | \n",
- "
\n",
- " \n",
- " 13055 | \n",
- " 646.612941 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Zurich Airport | \n",
- " ZRH | \n",
- " Zurich | \n",
- " CH | \n",
- " {'lat': '47.464699', 'lon': '8.54917'} | \n",
- " CH-ZH | \n",
- " Rain | \n",
- " ... | \n",
- " 644.418029 | \n",
- " Winnipeg / James Armstrong Richardson Internat... | \n",
- " YWG | \n",
- " Winnipeg | \n",
- " CA | \n",
- " {'lat': '49.90999985', 'lon': '-97.23989868'} | \n",
- " CA-MB | \n",
- " Rain | \n",
- " 6 | \n",
- " 2018-02-11 01:41:57 | \n",
- "
\n",
- " \n",
- " 13056 | \n",
- " 997.751876 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Ukrainka Air Base | \n",
- " XHBU | \n",
- " Belogorsk | \n",
- " RU | \n",
- " {'lat': '51.169997', 'lon': '128.445007'} | \n",
- " RU-AMU | \n",
- " Rain | \n",
- " ... | \n",
- " 937.540811 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Sunny | \n",
- " 6 | \n",
- " 2018-02-11 04:09:27 | \n",
- "
\n",
- " \n",
- " 13057 | \n",
- " 1102.814465 | \n",
- " False | \n",
- " JetBeats | \n",
- " Ministro Pistarini International Airport | \n",
- " EZE | \n",
- " Buenos Aires | \n",
- " AR | \n",
- " {'lat': '-34.8222', 'lon': '-58.5358'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " ... | \n",
- " 1697.404971 | \n",
- " Itami Airport | \n",
- " ITM | \n",
- " Osaka | \n",
- " JP | \n",
- " {'lat': '34.78549957', 'lon': '135.4380035'} | \n",
- " SE-BD | \n",
- " Hail | \n",
- " 6 | \n",
- " 2018-02-11 08:28:21 | \n",
- "
\n",
- " \n",
- " 13058 | \n",
- " 858.144337 | \n",
- " False | \n",
- " JetBeats | \n",
- " Washington Dulles International Airport | \n",
- " IAD | \n",
- " Washington | \n",
- " US | \n",
- " {'lat': '38.94449997', 'lon': '-77.45580292'} | \n",
- " US-DC | \n",
- " Heavy Fog | \n",
- " ... | \n",
- " 1610.761827 | \n",
- " Adelaide International Airport | \n",
- " ADL | \n",
- " Adelaide | \n",
- " AU | \n",
- " {'lat': '-34.945', 'lon': '138.531006'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " 6 | \n",
- " 2018-02-11 14:54:34 | \n",
- "
\n",
- " \n",
- "
\n",
- "
13059 rows × 27 columns
\n",
- "
"
- ],
- "text/plain": [
- " AvgTicketPrice Cancelled Carrier \\\n",
- "0 841.265642 False Kibana Airlines \n",
- "1 882.982662 False Logstash Airways \n",
- "2 190.636904 False Logstash Airways \n",
- "3 181.694216 True Kibana Airlines \n",
- "4 730.041778 False Kibana Airlines \n",
- "5 418.152089 False JetBeats \n",
- "6 180.246816 False JetBeats \n",
- "7 585.184310 False Kibana Airlines \n",
- "8 960.869736 True Kibana Airlines \n",
- "9 296.877773 False Logstash Airways \n",
- "10 906.437948 False JetBeats \n",
- "11 704.463771 False Logstash Airways \n",
- "12 922.499077 True Logstash Airways \n",
- "13 374.959276 False Logstash Airways \n",
- "14 552.917371 False Logstash Airways \n",
- "15 566.487557 True Kibana Airlines \n",
- "16 989.952787 True Logstash Airways \n",
- "17 569.613255 False ES-Air \n",
- "18 277.429707 False ES-Air \n",
- "19 772.100846 False JetBeats \n",
- "20 167.599922 False JetBeats \n",
- "21 253.210065 False ES-Air \n",
- "22 917.247620 False JetBeats \n",
- "23 451.591176 False Logstash Airways \n",
- "24 307.067201 False Logstash Airways \n",
- "25 268.241596 False ES-Air \n",
- "26 975.812632 True Kibana Airlines \n",
- "27 134.214546 False JetBeats \n",
- "28 988.897564 False Kibana Airlines \n",
- "29 511.067220 False Logstash Airways \n",
- "... ... ... ... \n",
- "13029 795.905278 False Kibana Airlines \n",
- "13030 863.388068 False Logstash Airways \n",
- "13031 575.183008 False JetBeats \n",
- "13032 817.368952 False JetBeats \n",
- "13033 579.582455 False ES-Air \n",
- "13034 1004.916638 False JetBeats \n",
- "13035 357.562842 True Logstash Airways \n",
- "13036 429.580539 False Logstash Airways \n",
- "13037 729.788171 True ES-Air \n",
- "13038 564.897695 False ES-Air \n",
- "13039 1014.052787 False Logstash Airways \n",
- "13040 455.243843 False ES-Air \n",
- "13041 611.370232 False Logstash Airways \n",
- "13042 595.961285 False JetBeats \n",
- "13043 782.747648 False Logstash Airways \n",
- "13044 891.117221 False JetBeats \n",
- "13045 587.169921 False Logstash Airways \n",
- "13046 739.132165 False Logstash Airways \n",
- "13047 605.191876 False JetBeats \n",
- "13048 361.767659 True Logstash Airways \n",
- "13049 662.306992 False ES-Air \n",
- "13050 630.779526 False JetBeats \n",
- "13051 937.771279 True Logstash Airways \n",
- "13052 1085.155339 False Logstash Airways \n",
- "13053 1191.964104 False Logstash Airways \n",
- "13054 1080.446279 False Logstash Airways \n",
- "13055 646.612941 False Logstash Airways \n",
- "13056 997.751876 False Logstash Airways \n",
- "13057 1102.814465 False JetBeats \n",
- "13058 858.144337 False JetBeats \n",
- "\n",
- " Dest DestAirportID \\\n",
- "0 Sydney Kingsford Smith International Airport SYD \n",
- "1 Venice Marco Polo Airport VE05 \n",
- "2 Venice Marco Polo Airport VE05 \n",
- "3 Treviso-Sant'Angelo Airport TV01 \n",
- "4 Xi'an Xianyang International Airport XIY \n",
- "5 Genoa Cristoforo Colombo Airport GE01 \n",
- "6 Zurich Airport ZRH \n",
- "7 Ottawa Macdonald-Cartier International Airport YOW \n",
- "8 Rajiv Gandhi International Airport HYD \n",
- "9 Treviso-Sant'Angelo Airport TV01 \n",
- "10 Helsinki Vantaa Airport HEL \n",
- "11 Vienna International Airport VIE \n",
- "12 Shanghai Pudong International Airport PVG \n",
- "13 Ottawa Macdonald-Cartier International Airport YOW \n",
- "14 Luis Munoz Marin International Airport SJU \n",
- "15 Cologne Bonn Airport CGN \n",
- "16 Venice Marco Polo Airport VE05 \n",
- "17 Ministro Pistarini International Airport EZE \n",
- "18 Shanghai Pudong International Airport PVG \n",
- "19 Indira Gandhi International Airport DEL \n",
- "20 Wichita Mid Continent Airport ICT \n",
- "21 Ottawa Macdonald-Cartier International Airport YOW \n",
- "22 Itami Airport ITM \n",
- "23 Vienna International Airport VIE \n",
- "24 Charles de Gaulle International Airport CDG \n",
- "25 Narita International Airport NRT \n",
- "26 Itami Airport ITM \n",
- "27 San Diego International Airport SAN \n",
- "28 Verona Villafranca Airport VR10 \n",
- "29 Zurich Airport ZRH \n",
- "... ... ... \n",
- "13029 Malpensa International Airport MI12 \n",
- "13030 Xi'an Xianyang International Airport XIY \n",
- "13031 Savannah Hilton Head International Airport SAV \n",
- "13032 Syracuse Hancock International Airport SYR \n",
- "13033 Tampa International Airport TPA \n",
- "13034 Olenya Air Base XLMO \n",
- "13035 Shanghai Pudong International Airport PVG \n",
- "13036 Venice Marco Polo Airport VE05 \n",
- "13037 Vienna International Airport VIE \n",
- "13038 Pisa International Airport PI05 \n",
- "13039 Vienna International Airport VIE \n",
- "13040 London Luton Airport LTN \n",
- "13041 Jorge Chavez International Airport LIM \n",
- "13042 Ottawa Macdonald-Cartier International Airport YOW \n",
- "13043 Xi'an Xianyang International Airport XIY \n",
- "13044 Winnipeg / James Armstrong Richardson Internat... YWG \n",
- "13045 Brisbane International Airport BNE \n",
- "13046 Xi'an Xianyang International Airport XIY \n",
- "13047 Portland International Jetport Airport PWM \n",
- "13048 Dubai International Airport DXB \n",
- "13049 Winnipeg / James Armstrong Richardson Internat... YWG \n",
- "13050 Helsinki Vantaa Airport HEL \n",
- "13051 Lester B. Pearson International Airport YYZ \n",
- "13052 Melbourne International Airport MEL \n",
- "13053 Zurich Airport ZRH \n",
- "13054 Xi'an Xianyang International Airport XIY \n",
- "13055 Zurich Airport ZRH \n",
- "13056 Ukrainka Air Base XHBU \n",
- "13057 Ministro Pistarini International Airport EZE \n",
- "13058 Washington Dulles International Airport IAD \n",
- "\n",
- " DestCityName DestCountry \\\n",
- "0 Sydney AU \n",
- "1 Venice IT \n",
- "2 Venice IT \n",
- "3 Treviso IT \n",
- "4 Xi'an CN \n",
- "5 Genova IT \n",
- "6 Zurich CH \n",
- "7 Ottawa CA \n",
- "8 Hyderabad IN \n",
- "9 Treviso IT \n",
- "10 Helsinki FI \n",
- "11 Vienna AT \n",
- "12 Shanghai CN \n",
- "13 Ottawa CA \n",
- "14 San Juan PR \n",
- "15 Cologne DE \n",
- "16 Venice IT \n",
- "17 Buenos Aires AR \n",
- "18 Shanghai CN \n",
- "19 New Delhi IN \n",
- "20 Wichita US \n",
- "21 Ottawa CA \n",
- "22 Osaka JP \n",
- "23 Vienna AT \n",
- "24 Paris FR \n",
- "25 Tokyo JP \n",
- "26 Osaka JP \n",
- "27 San Diego US \n",
- "28 Verona IT \n",
- "29 Zurich CH \n",
- "... ... ... \n",
- "13029 Milan IT \n",
- "13030 Xi'an CN \n",
- "13031 Savannah US \n",
- "13032 Syracuse US \n",
- "13033 Tampa US \n",
- "13034 Olenegorsk RU \n",
- "13035 Shanghai CN \n",
- "13036 Venice IT \n",
- "13037 Vienna AT \n",
- "13038 Pisa IT \n",
- "13039 Vienna AT \n",
- "13040 London GB \n",
- "13041 Lima PE \n",
- "13042 Ottawa CA \n",
- "13043 Xi'an CN \n",
- "13044 Winnipeg CA \n",
- "13045 Brisbane AU \n",
- "13046 Xi'an CN \n",
- "13047 Portland US \n",
- "13048 Dubai AE \n",
- "13049 Winnipeg CA \n",
- "13050 Helsinki FI \n",
- "13051 Toronto CA \n",
- "13052 Melbourne AU \n",
- "13053 Zurich CH \n",
- "13054 Xi'an CN \n",
- "13055 Zurich CH \n",
- "13056 Belogorsk RU \n",
- "13057 Buenos Aires AR \n",
- "13058 Washington US \n",
- "\n",
- " DestLocation DestRegion \\\n",
- "0 {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
- "1 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "3 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
- "4 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "5 {'lat': '44.4133', 'lon': '8.8375'} IT-42 \n",
- "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "7 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n",
- "8 {'lat': '17.23131752', 'lon': '78.42985535'} SE-BD \n",
- "9 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
- "10 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n",
- "11 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "12 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n",
- "13 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n",
- "14 {'lat': '18.43939972', 'lon': '-66.00180054'} PR-U-A \n",
- "15 {'lat': '50.86589813', 'lon': '7.142739773'} DE-NW \n",
- "16 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "17 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n",
- "18 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n",
- "19 {'lat': '28.5665', 'lon': '77.103104'} SE-BD \n",
- "20 {'lat': '37.64989853', 'lon': '-97.43309784'} US-KS \n",
- "21 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n",
- "22 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n",
- "23 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "24 {'lat': '49.01279831', 'lon': '2.549999952'} FR-J \n",
- "25 {'lat': '35.76470184', 'lon': '140.3860016'} SE-BD \n",
- "26 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n",
- "27 {'lat': '32.73360062', 'lon': '-117.1900024'} US-CA \n",
- "28 {'lat': '45.395699', 'lon': '10.8885'} IT-34 \n",
- "29 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "... ... ... \n",
- "13029 {'lat': '45.6306', 'lon': '8.72811'} IT-25 \n",
- "13030 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "13031 {'lat': '32.12760162', 'lon': '-81.20210266'} US-GA \n",
- "13032 {'lat': '43.11119843', 'lon': '-76.10630035'} US-NY \n",
- "13033 {'lat': '27.97550011', 'lon': '-82.53320313'} US-FL \n",
- "13034 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n",
- "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n",
- "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "13037 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "13038 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n",
- "13039 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "13040 {'lat': '51.87469864', 'lon': '-0.368333012'} GB-ENG \n",
- "13041 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n",
- "13042 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n",
- "13043 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "13044 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n",
- "13045 {'lat': '-27.38419914', 'lon': '153.1170044'} SE-BD \n",
- "13046 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "13047 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n",
- "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n",
- "13049 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n",
- "13050 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n",
- "13051 {'lat': '43.67720032', 'lon': '-79.63059998'} CA-ON \n",
- "13052 {'lat': '-37.673302', 'lon': '144.843002'} SE-BD \n",
- "13053 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "13054 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "13055 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "13056 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n",
- "13057 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n",
- "13058 {'lat': '38.94449997', 'lon': '-77.45580292'} US-DC \n",
- "\n",
- " DestWeather ... FlightTimeMin \\\n",
- "0 Rain ... 1030.770416 \n",
- "1 Sunny ... 464.389481 \n",
- "2 Cloudy ... 0.000000 \n",
- "3 Clear ... 222.749059 \n",
- "4 Clear ... 785.779071 \n",
- "5 Thunder & Lightning ... 393.590441 \n",
- "6 Hail ... 300.000000 \n",
- "7 Clear ... 614.942480 \n",
- "8 Cloudy ... 602.030591 \n",
- "9 Rain ... 174.822216 \n",
- "10 Rain ... 503.045170 \n",
- "11 Cloudy ... 36.075018 \n",
- "12 Clear ... 679.768391 \n",
- "13 Rain ... 330.418282 \n",
- "14 Clear ... 407.145031 \n",
- "15 Sunny ... 656.712658 \n",
- "16 Damaging Wind ... 773.030334 \n",
- "17 Cloudy ... 704.716920 \n",
- "18 Clear ... 355.957996 \n",
- "19 Clear ... 875.114675 \n",
- "20 Clear ... 373.966883 \n",
- "21 Hail ... 130.667700 \n",
- "22 Damaging Wind ... 574.495310 \n",
- "23 Heavy Fog ... 579.728943 \n",
- "24 Clear ... 50.157229 \n",
- "25 Rain ... 527.567422 \n",
- "26 Hail ... 386.259764 \n",
- "27 Clear ... 24.479650 \n",
- "28 Sunny ... 568.351033 \n",
- "29 Rain ... 425.889194 \n",
- "... ... ... ... \n",
- "13029 Sunny ... 534.375826 \n",
- "13030 Damaging Wind ... 141.172633 \n",
- "13031 Thunder & Lightning ... 1113.137060 \n",
- "13032 Rain ... 714.964864 \n",
- "13033 Rain ... 234.929046 \n",
- "13034 Clear ... 526.895776 \n",
- "13035 Thunder & Lightning ... 0.000000 \n",
- "13036 Sunny ... 150.000000 \n",
- "13037 Rain ... 691.944839 \n",
- "13038 Heavy Fog ... 567.387339 \n",
- "13039 Thunder & Lightning ... 690.092327 \n",
- "13040 Cloudy ... 3.028293 \n",
- "13041 Sunny ... 338.875531 \n",
- "13042 Clear ... 375.129587 \n",
- "13043 Clear ... 156.858481 \n",
- "13044 Clear ... 354.106457 \n",
- "13045 Rain ... 771.305442 \n",
- "13046 Rain ... 542.955572 \n",
- "13047 Thunder & Lightning ... 564.599857 \n",
- "13048 Sunny ... 180.000000 \n",
- "13049 Heavy Fog ... 835.954429 \n",
- "13050 Sunny ... 451.755639 \n",
- "13051 Sunny ... 507.451571 \n",
- "13052 Cloudy ... 1044.451122 \n",
- "13053 Hail ... 728.715904 \n",
- "13054 Rain ... 402.929088 \n",
- "13055 Rain ... 644.418029 \n",
- "13056 Rain ... 937.540811 \n",
- "13057 Hail ... 1697.404971 \n",
- "13058 Heavy Fog ... 1610.761827 \n",
- "\n",
- " Origin OriginAirportID \\\n",
- "0 Frankfurt am Main Airport FRA \n",
- "1 Cape Town International Airport CPT \n",
- "2 Venice Marco Polo Airport VE05 \n",
- "3 Naples International Airport NA01 \n",
- "4 Licenciado Benito Juarez International Airport AICM \n",
- "5 Edmonton International Airport CYEG \n",
- "6 Zurich Airport ZRH \n",
- "7 Ciampino___G. B. Pastine International Airport RM12 \n",
- "8 Milano Linate Airport MI11 \n",
- "9 Sheremetyevo International Airport SVO \n",
- "10 Albuquerque International Sunport Airport ABQ \n",
- "11 Venice Marco Polo Airport VE05 \n",
- "12 Licenciado Benito Juarez International Airport AICM \n",
- "13 Naples International Airport NA01 \n",
- "14 Ciampino___G. B. Pastine International Airport RM12 \n",
- "15 Chengdu Shuangliu International Airport CTU \n",
- "16 Licenciado Benito Juarez International Airport AICM \n",
- "17 Cleveland Hopkins International Airport CLE \n",
- "18 Olenya Air Base XLMO \n",
- "19 Casper-Natrona County International Airport CPR \n",
- "20 Erie International Tom Ridge Field ERI \n",
- "21 Newark Liberty International Airport EWR \n",
- "22 Copenhagen Kastrup Airport CPH \n",
- "23 Seattle Tacoma International Airport SEA \n",
- "24 Berlin-Tegel Airport TXL \n",
- "25 Manchester Airport MAN \n",
- "26 Helsinki Vantaa Airport HEL \n",
- "27 Phoenix Sky Harbor International Airport PHX \n",
- "28 New Chitose Airport CTS \n",
- "29 Tulsa International Airport TUL \n",
- "... ... ... \n",
- "13029 Itami Airport ITM \n",
- "13030 Tokyo Haneda International Airport HND \n",
- "13031 OR Tambo International Airport JNB \n",
- "13032 El Dorado International Airport BOG \n",
- "13033 Jorge Chavez International Airport LIM \n",
- "13034 Gimpo International Airport GMP \n",
- "13035 Shanghai Pudong International Airport PVG \n",
- "13036 Venice Marco Polo Airport VE05 \n",
- "13037 Ukrainka Air Base XHBU \n",
- "13038 OR Tambo International Airport JNB \n",
- "13039 Montreal / Pierre Elliott Trudeau Internationa... YUL \n",
- "13040 London Heathrow Airport LHR \n",
- "13041 Casper-Natrona County International Airport CPR \n",
- "13042 Frankfurt am Main Airport FRA \n",
- "13043 Tokyo Haneda International Airport HND \n",
- "13044 Vienna International Airport VIE \n",
- "13045 Amsterdam Airport Schiphol AMS \n",
- "13046 Winnipeg / James Armstrong Richardson Internat... YWG \n",
- "13047 Jeju International Airport CJU \n",
- "13048 Dubai International Airport DXB \n",
- "13049 Ministro Pistarini International Airport EZE \n",
- "13050 Beijing Capital International Airport PEK \n",
- "13051 Leonardo da Vinci___Fiumicino Airport RM11 \n",
- "13052 Bologna Guglielmo Marconi Airport BO08 \n",
- "13053 Portland International Jetport Airport PWM \n",
- "13054 Pisa International Airport PI05 \n",
- "13055 Winnipeg / James Armstrong Richardson Internat... YWG \n",
- "13056 Licenciado Benito Juarez International Airport AICM \n",
- "13057 Itami Airport ITM \n",
- "13058 Adelaide International Airport ADL \n",
- "\n",
- " OriginCityName OriginCountry \\\n",
- "0 Frankfurt am Main DE \n",
- "1 Cape Town ZA \n",
- "2 Venice IT \n",
- "3 Naples IT \n",
- "4 Mexico City MX \n",
- "5 Edmonton CA \n",
- "6 Zurich CH \n",
- "7 Rome IT \n",
- "8 Milan IT \n",
- "9 Moscow RU \n",
- "10 Albuquerque US \n",
- "11 Venice IT \n",
- "12 Mexico City MX \n",
- "13 Naples IT \n",
- "14 Rome IT \n",
- "15 Chengdu CN \n",
- "16 Mexico City MX \n",
- "17 Cleveland US \n",
- "18 Olenegorsk RU \n",
- "19 Casper US \n",
- "20 Erie US \n",
- "21 Newark US \n",
- "22 Copenhagen DK \n",
- "23 Seattle US \n",
- "24 Berlin DE \n",
- "25 Manchester GB \n",
- "26 Helsinki FI \n",
- "27 Phoenix US \n",
- "28 Chitose / Tomakomai JP \n",
- "29 Tulsa US \n",
- "... ... ... \n",
- "13029 Osaka JP \n",
- "13030 Tokyo JP \n",
- "13031 Johannesburg ZA \n",
- "13032 Bogota CO \n",
- "13033 Lima PE \n",
- "13034 Seoul KR \n",
- "13035 Shanghai CN \n",
- "13036 Venice IT \n",
- "13037 Belogorsk RU \n",
- "13038 Johannesburg ZA \n",
- "13039 Montreal CA \n",
- "13040 London GB \n",
- "13041 Casper US \n",
- "13042 Frankfurt am Main DE \n",
- "13043 Tokyo JP \n",
- "13044 Vienna AT \n",
- "13045 Amsterdam NL \n",
- "13046 Winnipeg CA \n",
- "13047 Jeju City KR \n",
- "13048 Dubai AE \n",
- "13049 Buenos Aires AR \n",
- "13050 Beijing CN \n",
- "13051 Rome IT \n",
- "13052 Bologna IT \n",
- "13053 Portland US \n",
- "13054 Pisa IT \n",
- "13055 Winnipeg CA \n",
- "13056 Mexico City MX \n",
- "13057 Osaka JP \n",
- "13058 Adelaide AU \n",
- "\n",
- " OriginLocation OriginRegion \\\n",
- "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
- "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
- "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
- "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "5 {'lat': '53.30970001', 'lon': '-113.5800018'} CA-AB \n",
- "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n",
- "7 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n",
- "8 {'lat': '45.445099', 'lon': '9.27674'} IT-25 \n",
- "9 {'lat': '55.972599', 'lon': '37.4146'} RU-MOS \n",
- "10 {'lat': '35.040199', 'lon': '-106.609001'} US-NM \n",
- "11 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "12 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "13 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
- "14 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n",
- "15 {'lat': '30.57850075', 'lon': '103.9469986'} SE-BD \n",
- "16 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "17 {'lat': '41.4117012', 'lon': '-81.84980011'} US-OH \n",
- "18 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n",
- "19 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n",
- "20 {'lat': '42.08312701', 'lon': '-80.17386675'} US-PA \n",
- "21 {'lat': '40.69250107', 'lon': '-74.16870117'} US-NJ \n",
- "22 {'lat': '55.61790085', 'lon': '12.65600014'} DK-84 \n",
- "23 {'lat': '47.44900131', 'lon': '-122.3089981'} US-WA \n",
- "24 {'lat': '52.5597', 'lon': '13.2877'} DE-BE \n",
- "25 {'lat': '53.35369873', 'lon': '-2.274950027'} GB-ENG \n",
- "26 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n",
- "27 {'lat': '33.43429947', 'lon': '-112.012001'} US-AZ \n",
- "28 {'lat': '42.77519989', 'lon': '141.6920013'} SE-BD \n",
- "29 {'lat': '36.19839859', 'lon': '-95.88809967'} US-OK \n",
- "... ... ... \n",
- "13029 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n",
- "13030 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n",
- "13031 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n",
- "13032 {'lat': '4.70159', 'lon': '-74.1469'} CO-CUN \n",
- "13033 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n",
- "13034 {'lat': '37.5583', 'lon': '126.791'} SE-BD \n",
- "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n",
- "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "13037 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n",
- "13038 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n",
- "13039 {'lat': '45.47060013', 'lon': '-73.74079895'} CA-QC \n",
- "13040 {'lat': '51.4706', 'lon': '-0.461941'} GB-ENG \n",
- "13041 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n",
- "13042 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
- "13043 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n",
- "13044 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n",
- "13045 {'lat': '52.30860138', 'lon': '4.76388979'} NL-NH \n",
- "13046 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n",
- "13047 {'lat': '33.51129913', 'lon': '126.4929962'} SE-BD \n",
- "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n",
- "13049 {'lat': '-34.8222', 'lon': '-58.5358'} AR-B \n",
- "13050 {'lat': '40.08010101', 'lon': '116.5849991'} SE-BD \n",
- "13051 {'lat': '41.8002778', 'lon': '12.2388889'} IT-62 \n",
- "13052 {'lat': '44.5354', 'lon': '11.2887'} IT-45 \n",
- "13053 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n",
- "13054 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n",
- "13055 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n",
- "13056 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
- "13057 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n",
- "13058 {'lat': '-34.945', 'lon': '138.531006'} SE-BD \n",
- "\n",
- " OriginWeather dayOfWeek timestamp \n",
- "0 Sunny 0 2018-01-01 00:00:00 \n",
- "1 Clear 0 2018-01-01 18:27:00 \n",
- "2 Rain 0 2018-01-01 17:11:14 \n",
- "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n",
- "4 Damaging Wind 0 2018-01-01 05:13:00 \n",
- "5 Rain 0 2018-01-01 01:43:03 \n",
- "6 Clear 0 2018-01-01 13:49:53 \n",
- "7 Thunder & Lightning 0 2018-01-01 04:54:59 \n",
- "8 Heavy Fog 0 2018-01-01 12:09:35 \n",
- "9 Cloudy 0 2018-01-01 12:09:35 \n",
- "10 Rain 0 2018-01-01 22:06:14 \n",
- "11 Rain 0 2018-01-01 11:52:34 \n",
- "12 Heavy Fog 0 2018-01-01 02:13:46 \n",
- "13 Rain 0 2018-01-01 14:21:13 \n",
- "14 Cloudy 0 2018-01-01 17:42:53 \n",
- "15 Thunder & Lightning 0 2018-01-01 19:55:32 \n",
- "16 Thunder & Lightning 0 2018-01-01 07:49:27 \n",
- "17 Rain 0 2018-01-01 01:30:47 \n",
- "18 Hail 0 2018-01-01 07:58:17 \n",
- "19 Cloudy 0 2018-01-01 00:02:06 \n",
- "20 Cloudy 0 2018-01-01 01:08:20 \n",
- "21 Clear 0 2018-01-01 01:08:20 \n",
- "22 Sunny 0 2018-01-01 07:48:35 \n",
- "23 Heavy Fog 0 2018-01-01 18:57:21 \n",
- "24 Rain 0 2018-01-01 13:18:25 \n",
- "25 Thunder & Lightning 0 2018-01-01 08:20:35 \n",
- "26 Rain 0 2018-01-01 15:38:32 \n",
- "27 Clear 0 2018-01-01 03:08:45 \n",
- "28 Damaging Wind 0 2018-01-01 01:16:59 \n",
- "29 Rain 0 2018-01-01 18:00:59 \n",
- "... ... ... ... \n",
- "13029 Sunny 6 2018-02-11 20:10:13 \n",
- "13030 Clear 6 2018-02-11 18:59:53 \n",
- "13031 Hail 6 2018-02-11 00:57:48 \n",
- "13032 Thunder & Lightning 6 2018-02-11 12:02:49 \n",
- "13033 Thunder & Lightning 6 2018-02-11 02:07:40 \n",
- "13034 Sunny 6 2018-02-11 00:35:04 \n",
- "13035 Thunder & Lightning 6 2018-02-11 11:19:12 \n",
- "13036 Cloudy 6 2018-02-11 15:07:11 \n",
- "13037 Damaging Wind 6 2018-02-11 10:24:42 \n",
- "13038 Damaging Wind 6 2018-02-11 00:42:06 \n",
- "13039 Thunder & Lightning 6 2018-02-11 10:56:31 \n",
- "13040 Clear 6 2018-02-11 00:39:37 \n",
- "13041 Rain 6 2018-02-11 10:24:30 \n",
- "13042 Clear 6 2018-02-11 09:02:07 \n",
- "13043 Thunder & Lightning 6 2018-02-11 04:45:06 \n",
- "13044 Thunder & Lightning 6 2018-02-11 00:51:14 \n",
- "13045 Sunny 6 2018-02-11 05:41:51 \n",
- "13046 Hail 6 2018-02-11 10:02:21 \n",
- "13047 Cloudy 6 2018-02-11 15:55:10 \n",
- "13048 Hail 6 2018-02-11 04:11:14 \n",
- "13049 Sunny 6 2018-02-11 10:13:32 \n",
- "13050 Cloudy 6 2018-02-11 11:23:23 \n",
- "13051 Hail 6 2018-02-11 01:13:50 \n",
- "13052 Cloudy 6 2018-02-11 18:35:42 \n",
- "13053 Clear 6 2018-02-11 19:02:10 \n",
- "13054 Sunny 6 2018-02-11 20:42:25 \n",
- "13055 Rain 6 2018-02-11 01:41:57 \n",
- "13056 Sunny 6 2018-02-11 04:09:27 \n",
- "13057 Hail 6 2018-02-11 08:28:21 \n",
- "13058 Rain 6 2018-02-11 14:54:34 \n",
- "\n",
- "[13059 rows x 27 columns]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd_df"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Eland"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "import eland as ed"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "ed_df = ed.read_es('localhost', 'flights')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " AvgTicketPrice | \n",
- " Cancelled | \n",
- " Carrier | \n",
- " Dest | \n",
- " DestAirportID | \n",
- " DestCityName | \n",
- " DestCountry | \n",
- " DestLocation | \n",
- " DestRegion | \n",
- " DestWeather | \n",
- " ... | \n",
- " FlightTimeMin | \n",
- " Origin | \n",
- " OriginAirportID | \n",
- " OriginCityName | \n",
- " OriginCountry | \n",
- " OriginLocation | \n",
- " OriginRegion | \n",
- " OriginWeather | \n",
- " dayOfWeek | \n",
- " timestamp | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " PNr3fWsBVUchcQJXWbLE | \n",
- " 841.265642 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Sydney Kingsford Smith International Airport | \n",
- " SYD | \n",
- " Sydney | \n",
- " AU | \n",
- " {'lat': '-33.94609833', 'lon': '151.177002'} | \n",
- " SE-BD | \n",
- " Rain | \n",
- " ... | \n",
- " 1030.770416 | \n",
- " Frankfurt am Main Airport | \n",
- " FRA | \n",
- " Frankfurt am Main | \n",
- " DE | \n",
- " {'lat': '50.033333', 'lon': '8.570556'} | \n",
- " DE-HE | \n",
- " Sunny | \n",
- " 0 | \n",
- " 2018-01-01 00:00:00 | \n",
- "
\n",
- " \n",
- " Pdr3fWsBVUchcQJXWbLE | \n",
- " 882.982662 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Sunny | \n",
- " ... | \n",
- " 464.389481 | \n",
- " Cape Town International Airport | \n",
- " CPT | \n",
- " Cape Town | \n",
- " ZA | \n",
- " {'lat': '-33.96480179', 'lon': '18.60169983'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " 0 | \n",
- " 2018-01-01 18:27:00 | \n",
- "
\n",
- " \n",
- " Ptr3fWsBVUchcQJXWbLE | \n",
- " 190.636904 | \n",
- " False | \n",
- " Logstash Airways | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Cloudy | \n",
- " ... | \n",
- " 0.000000 | \n",
- " Venice Marco Polo Airport | \n",
- " VE05 | \n",
- " Venice | \n",
- " IT | \n",
- " {'lat': '45.505299', 'lon': '12.3519'} | \n",
- " IT-34 | \n",
- " Rain | \n",
- " 0 | \n",
- " 2018-01-01 17:11:14 | \n",
- "
\n",
- " \n",
- " P9r3fWsBVUchcQJXWbLE | \n",
- " 181.694216 | \n",
- " True | \n",
- " Kibana Airlines | \n",
- " Treviso-Sant'Angelo Airport | \n",
- " TV01 | \n",
- " Treviso | \n",
- " IT | \n",
- " {'lat': '45.648399', 'lon': '12.1944'} | \n",
- " IT-34 | \n",
- " Clear | \n",
- " ... | \n",
- " 222.749059 | \n",
- " Naples International Airport | \n",
- " NA01 | \n",
- " Naples | \n",
- " IT | \n",
- " {'lat': '40.886002', 'lon': '14.2908'} | \n",
- " IT-72 | \n",
- " Thunder & Lightning | \n",
- " 0 | \n",
- " 2018-01-01 10:33:28 | \n",
- "
\n",
- " \n",
- " QNr3fWsBVUchcQJXWbLE | \n",
- " 730.041778 | \n",
- " False | \n",
- " Kibana Airlines | \n",
- " Xi'an Xianyang International Airport | \n",
- " XIY | \n",
- " Xi'an | \n",
- " CN | \n",
- " {'lat': '34.447102', 'lon': '108.751999'} | \n",
- " SE-BD | \n",
- " Clear | \n",
- " ... | \n",
- " 785.779071 | \n",
- " Licenciado Benito Juarez International Airport | \n",
- " AICM | \n",
- " Mexico City | \n",
- " MX | \n",
- " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
- " MX-DIF | \n",
- " Damaging Wind | \n",
- " 0 | \n",
- " 2018-01-01 05:13:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 27 columns
\n",
- "
"
- ],
- "text/plain": [
- " AvgTicketPrice Cancelled Carrier \\\n",
- "PNr3fWsBVUchcQJXWbLE 841.265642 False Kibana Airlines \n",
- "Pdr3fWsBVUchcQJXWbLE 882.982662 False Logstash Airways \n",
- "Ptr3fWsBVUchcQJXWbLE 190.636904 False Logstash Airways \n",
- "P9r3fWsBVUchcQJXWbLE 181.694216 True Kibana Airlines \n",
- "QNr3fWsBVUchcQJXWbLE 730.041778 False Kibana Airlines \n",
- "\n",
- " Dest \\\n",
- "PNr3fWsBVUchcQJXWbLE Sydney Kingsford Smith International Airport \n",
- "Pdr3fWsBVUchcQJXWbLE Venice Marco Polo Airport \n",
- "Ptr3fWsBVUchcQJXWbLE Venice Marco Polo Airport \n",
- "P9r3fWsBVUchcQJXWbLE Treviso-Sant'Angelo Airport \n",
- "QNr3fWsBVUchcQJXWbLE Xi'an Xianyang International Airport \n",
- "\n",
- " DestAirportID DestCityName DestCountry \\\n",
- "PNr3fWsBVUchcQJXWbLE SYD Sydney AU \n",
- "Pdr3fWsBVUchcQJXWbLE VE05 Venice IT \n",
- "Ptr3fWsBVUchcQJXWbLE VE05 Venice IT \n",
- "P9r3fWsBVUchcQJXWbLE TV01 Treviso IT \n",
- "QNr3fWsBVUchcQJXWbLE XIY Xi'an CN \n",
- "\n",
- " DestLocation DestRegion \\\n",
- "PNr3fWsBVUchcQJXWbLE {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
- "Pdr3fWsBVUchcQJXWbLE {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "Ptr3fWsBVUchcQJXWbLE {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
- "P9r3fWsBVUchcQJXWbLE {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
- "QNr3fWsBVUchcQJXWbLE {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
- "\n",
- " DestWeather ... FlightTimeMin \\\n",
- "PNr3fWsBVUchcQJXWbLE Rain ... 1030.770416 \n",
- "Pdr3fWsBVUchcQJXWbLE Sunny ... 464.389481 \n",
- "Ptr3fWsBVUchcQJXWbLE Cloudy ... 0.000000 \n",
- "P9r3fWsBVUchcQJXWbLE Clear ... 222.749059 \n",
- "QNr3fWsBVUchcQJXWbLE Clear ... 785.779071 \n",
- "\n",
- " Origin \\\n",
- "PNr3fWsBVUchcQJXWbLE Frankfurt am Main Airport \n",
- "Pdr3fWsBVUchcQJXWbLE Cape Town International Airport \n",
- "Ptr3fWsBVUchcQJXWbLE Venice Marco Polo Airport \n",
- "P9r3fWsBVUchcQJXWbLE Naples International Airport \n",
- "QNr3fWsBVUchcQJXWbLE Licenciado Benito Juarez International Airport \n",
- "\n",
- " OriginAirportID OriginCityName OriginCountry \\\n",
- "PNr3fWsBVUchcQJXWbLE FRA Frankfurt am Main DE \n",
- "Pdr3fWsBVUchcQJXWbLE CPT Cape Town ZA \n",
- "Ptr3fWsBVUchcQJXWbLE VE05 Venice IT \n",
- "P9r3fWsBVUchcQJXWbLE NA01 Naples IT \n",
- "QNr3fWsBVUchcQJXWbLE AICM Mexico City MX \n",
- "\n",
- " OriginLocation \\\n",
- "PNr3fWsBVUchcQJXWbLE {'lat': '50.033333', 'lon': '8.570556'} \n",
- "Pdr3fWsBVUchcQJXWbLE {'lat': '-33.96480179', 'lon': '18.60169983'} \n",
- "Ptr3fWsBVUchcQJXWbLE {'lat': '45.505299', 'lon': '12.3519'} \n",
- "P9r3fWsBVUchcQJXWbLE {'lat': '40.886002', 'lon': '14.2908'} \n",
- "QNr3fWsBVUchcQJXWbLE {'lat': '19.4363', 'lon': '-99.072098'} \n",
- "\n",
- " OriginRegion OriginWeather dayOfWeek \\\n",
- "PNr3fWsBVUchcQJXWbLE DE-HE Sunny 0 \n",
- "Pdr3fWsBVUchcQJXWbLE SE-BD Clear 0 \n",
- "Ptr3fWsBVUchcQJXWbLE IT-34 Rain 0 \n",
- "P9r3fWsBVUchcQJXWbLE IT-72 Thunder & Lightning 0 \n",
- "QNr3fWsBVUchcQJXWbLE MX-DIF Damaging Wind 0 \n",
- "\n",
- " timestamp \n",
- "PNr3fWsBVUchcQJXWbLE 2018-01-01 00:00:00 \n",
- "Pdr3fWsBVUchcQJXWbLE 2018-01-01 18:27:00 \n",
- "Ptr3fWsBVUchcQJXWbLE 2018-01-01 17:11:14 \n",
- "P9r3fWsBVUchcQJXWbLE 2018-01-01 10:33:28 \n",
- "QNr3fWsBVUchcQJXWbLE 2018-01-01 05:13:00 \n",
- "\n",
- "[5 rows x 27 columns]"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " AvgTicketPrice | \n",
- " DistanceKilometers | \n",
- " DistanceMiles | \n",
- " FlightDelayMin | \n",
- " FlightTimeHour | \n",
- " FlightTimeMin | \n",
- " dayOfWeek | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " count | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- " 13059.000000 | \n",
- "
\n",
- " \n",
- " mean | \n",
- " 628.253689 | \n",
- " 7092.142457 | \n",
- " 4406.853010 | \n",
- " 47.335171 | \n",
- " 8.518797 | \n",
- " 511.127842 | \n",
- " 2.835975 | \n",
- "
\n",
- " \n",
- " std | \n",
- " 266.386661 | \n",
- " 4578.263193 | \n",
- " 2844.800855 | \n",
- " 96.743006 | \n",
- " 5.579019 | \n",
- " 334.741135 | \n",
- " 1.939365 | \n",
- "
\n",
- " \n",
- " min | \n",
- " 100.020531 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 25% | \n",
- " 410.008918 | \n",
- " 2470.545974 | \n",
- " 1535.126118 | \n",
- " 0.000000 | \n",
- " 4.195650 | \n",
- " 251.938710 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- " 50% | \n",
- " 640.387285 | \n",
- " 7612.072403 | \n",
- " 4729.922470 | \n",
- " 0.000000 | \n",
- " 8.385816 | \n",
- " 503.148975 | \n",
- " 3.000000 | \n",
- "
\n",
- " \n",
- " 75% | \n",
- " 842.277083 | \n",
- " 9735.660463 | \n",
- " 6049.583389 | \n",
- " 12.618243 | \n",
- " 12.008909 | \n",
- " 720.505705 | \n",
- " 4.197761 | \n",
- "
\n",
- " \n",
- " max | \n",
- " 1199.729004 | \n",
- " 19881.482422 | \n",
- " 12353.780273 | \n",
- " 360.000000 | \n",
- " 31.715034 | \n",
- " 1902.901978 | \n",
- " 6.000000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
- "count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
- "mean 628.253689 7092.142457 4406.853010 47.335171 \n",
- "std 266.386661 4578.263193 2844.800855 96.743006 \n",
- "min 100.020531 0.000000 0.000000 0.000000 \n",
- "25% 410.008918 2470.545974 1535.126118 0.000000 \n",
- "50% 640.387285 7612.072403 4729.922470 0.000000 \n",
- "75% 842.277083 9735.660463 6049.583389 12.618243 \n",
- "max 1199.729004 19881.482422 12353.780273 360.000000 \n",
- "\n",
- " FlightTimeHour FlightTimeMin dayOfWeek \n",
- "count 13059.000000 13059.000000 13059.000000 \n",
- "mean 8.518797 511.127842 2.835975 \n",
- "std 5.579019 334.741135 1.939365 \n",
- "min 0.000000 0.000000 0.000000 \n",
- "25% 4.195650 251.938710 1.000000 \n",
- "50% 8.385816 503.148975 3.000000 \n",
- "75% 12.008909 720.505705 4.197761 \n",
- "max 31.715034 1902.901978 6.000000 "
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df.describe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(13059, 27)"
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID',\n",
- " 'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion',\n",
- " 'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay',\n",
- " 'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour',\n",
- " 'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',\n",
- " 'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather',\n",
- " 'dayOfWeek', 'timestamp'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df.columns"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "AvgTicketPrice float64\n",
- "Cancelled bool\n",
- "Carrier object\n",
- "Dest object\n",
- "DestAirportID object\n",
- "DestCityName object\n",
- "DestCountry object\n",
- "DestLocation object\n",
- "DestRegion object\n",
- "DestWeather object\n",
- "DistanceKilometers float64\n",
- "DistanceMiles float64\n",
- "FlightDelay bool\n",
- "FlightDelayMin int64\n",
- "FlightDelayType object\n",
- "FlightNum object\n",
- "FlightTimeHour float64\n",
- "FlightTimeMin float64\n",
- "Origin object\n",
- "OriginAirportID object\n",
- "OriginCityName object\n",
- "OriginCountry object\n",
- "OriginLocation object\n",
- "OriginRegion object\n",
- "OriginWeather object\n",
- "dayOfWeek int64\n",
- "timestamp datetime64[ns]\n",
- "dtype: object"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df.dtypes"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 13059 entries, 0 to 13058\n",
- "Data columns (total 27 columns):\n",
- "AvgTicketPrice 13059 non-null float64\n",
- "Cancelled 13059 non-null bool\n",
- "Carrier 13059 non-null object\n",
- "Dest 13059 non-null object\n",
- "DestAirportID 13059 non-null object\n",
- "DestCityName 13059 non-null object\n",
- "DestCountry 13059 non-null object\n",
- "DestLocation 13059 non-null object\n",
- "DestRegion 13059 non-null object\n",
- "DestWeather 13059 non-null object\n",
- "DistanceKilometers 13059 non-null float64\n",
- "DistanceMiles 13059 non-null float64\n",
- "FlightDelay 13059 non-null bool\n",
- "FlightDelayMin 13059 non-null int64\n",
- "FlightDelayType 13059 non-null object\n",
- "FlightNum 13059 non-null object\n",
- "FlightTimeHour 13059 non-null float64\n",
- "FlightTimeMin 13059 non-null float64\n",
- "Origin 13059 non-null object\n",
- "OriginAirportID 13059 non-null object\n",
- "OriginCityName 13059 non-null object\n",
- "OriginCountry 13059 non-null object\n",
- "OriginLocation 13059 non-null object\n",
- "OriginRegion 13059 non-null object\n",
- "OriginWeather 13059 non-null object\n",
- "dayOfWeek 13059 non-null int64\n",
- "timestamp 13059 non-null datetime64[ns]\n",
- "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n",
- "memory usage: 56.0 bytes\n"
- ]
- }
- ],
- "source": [
- "ed_df.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "AvgTicketPrice 13059\n",
- "Cancelled 13059\n",
- "Carrier 13059\n",
- "Dest 13059\n",
- "DestAirportID 13059\n",
- "DestCityName 13059\n",
- "DestCountry 13059\n",
- "DestLocation 13059\n",
- "DestRegion 13059\n",
- "DestWeather 13059\n",
- "DistanceKilometers 13059\n",
- "DistanceMiles 13059\n",
- "FlightDelay 13059\n",
- "FlightDelayMin 13059\n",
- "FlightDelayType 13059\n",
- "FlightNum 13059\n",
- "FlightTimeHour 13059\n",
- "FlightTimeMin 13059\n",
- "Origin 13059\n",
- "OriginAirportID 13059\n",
- "OriginCityName 13059\n",
- "OriginCountry 13059\n",
- "OriginLocation 13059\n",
- "OriginRegion 13059\n",
- "OriginWeather 13059\n",
- "dayOfWeek 13059\n",
- "timestamp 13059\n",
- "dtype: int64"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df.count()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "bool 2\n",
- "datetime64[ns] 1\n",
- "float64 5\n",
- "int64 2\n",
- "object 17\n",
- "dtype: int64"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df.get_dtype_counts()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- " Carrier Dest \\\n",
- "0 Kibana Airlines Sydney Kingsford Smith International Airport \n",
- "1 Logstash Airways Venice Marco Polo Airport \n",
- "2 Logstash Airways Venice Marco Polo Airport \n",
- "3 Kibana Airlines Treviso-Sant'Angelo Airport \n",
- "4 Kibana Airlines Xi'an Xianyang International Airport \n",
- "5 JetBeats Genoa Cristoforo Colombo Airport \n",
- "6 JetBeats Zurich Airport \n",
- "7 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n",
- "8 Kibana Airlines Rajiv Gandhi International Airport \n",
- "9 Logstash Airways Treviso-Sant'Angelo Airport \n",
- "10 JetBeats Helsinki Vantaa Airport \n",
- "11 Logstash Airways Vienna International Airport \n",
- "12 Logstash Airways Shanghai Pudong International Airport \n",
- "13 Logstash Airways Ottawa Macdonald-Cartier International Airport \n",
- "14 Logstash Airways Luis Munoz Marin International Airport \n",
- "15 Kibana Airlines Cologne Bonn Airport \n",
- "16 Logstash Airways Venice Marco Polo Airport \n",
- "17 ES-Air Ministro Pistarini International Airport \n",
- "18 ES-Air Shanghai Pudong International Airport \n",
- "19 JetBeats Indira Gandhi International Airport \n",
- "20 JetBeats Wichita Mid Continent Airport \n",
- "21 ES-Air Ottawa Macdonald-Cartier International Airport \n",
- "22 JetBeats Itami Airport \n",
- "23 Logstash Airways Vienna International Airport \n",
- "24 Logstash Airways Charles de Gaulle International Airport \n",
- "25 ES-Air Narita International Airport \n",
- "26 Kibana Airlines Itami Airport \n",
- "27 JetBeats San Diego International Airport \n",
- "28 Kibana Airlines Verona Villafranca Airport \n",
- "29 Logstash Airways Zurich Airport \n",
- "... ... ... \n",
- "13029 Kibana Airlines Chengdu Shuangliu International Airport \n",
- "13030 ES-Air Narita International Airport \n",
- "13031 Kibana Airlines Narita International Airport \n",
- "13032 JetBeats Wichita Mid Continent Airport \n",
- "13033 Logstash Airways Sheremetyevo International Airport \n",
- "13034 ES-Air El Dorado International Airport \n",
- "13035 JetBeats Turin Airport \n",
- "13036 Kibana Airlines Winnipeg / James Armstrong Richardson Internat... \n",
- "13037 ES-Air Chengdu Shuangliu International Airport \n",
- "13038 JetBeats San Diego International Airport \n",
- "13039 ES-Air Ministro Pistarini International Airport \n",
- "13040 JetBeats Vienna International Airport \n",
- "13041 ES-Air Louisville International Standiford Field \n",
- "13042 Logstash Airways Ottawa Macdonald-Cartier International Airport \n",
- "13043 Kibana Airlines Shanghai Pudong International Airport \n",
- "13044 Kibana Airlines Zurich Airport \n",
- "13045 Kibana Airlines London Heathrow Airport \n",
- "13046 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n",
- "13047 ES-Air Manchester Airport \n",
- "13048 Logstash Airways Comodoro Arturo Merino Benitez International A... \n",
- "13049 Kibana Airlines Olenya Air Base \n",
- "13050 Logstash Airways Sheremetyevo International Airport \n",
- "13051 JetBeats Indira Gandhi International Airport \n",
- "13052 Logstash Airways Stockholm-Arlanda Airport \n",
- "13053 Kibana Airlines Rochester International Airport \n",
- "13054 Logstash Airways New Chitose Airport \n",
- "13055 Logstash Airways San Antonio International Airport \n",
- "13056 JetBeats Zurich Airport \n",
- "13057 ES-Air Vienna International Airport \n",
- "13058 Kibana Airlines Rajiv Gandhi International Airport \n",
- "\n",
- " OriginRegion \n",
- "0 DE-HE \n",
- "1 SE-BD \n",
- "2 IT-34 \n",
- "3 IT-72 \n",
- "4 MX-DIF \n",
- "5 CA-AB \n",
- "6 CH-ZH \n",
- "7 IT-62 \n",
- "8 IT-25 \n",
- "9 RU-MOS \n",
- "10 US-NM \n",
- "11 IT-34 \n",
- "12 MX-DIF \n",
- "13 IT-72 \n",
- "14 IT-62 \n",
- "15 SE-BD \n",
- "16 MX-DIF \n",
- "17 US-OH \n",
- "18 RU-MUR \n",
- "19 US-WY \n",
- "20 US-PA \n",
- "21 US-NJ \n",
- "22 DK-84 \n",
- "23 US-WA \n",
- "24 DE-BE \n",
- "25 GB-ENG \n",
- "26 FI-ES \n",
- "27 US-AZ \n",
- "28 SE-BD \n",
- "29 US-OK \n",
- "... ... \n",
- "13029 SE-BD \n",
- "13030 IT-82 \n",
- "13031 US-KY \n",
- "13032 US-WA \n",
- "13033 US-OR \n",
- "13034 SE-BD \n",
- "13035 US-NC \n",
- "13036 IT-34 \n",
- "13037 IT-82 \n",
- "13038 US-NY \n",
- "13039 SE-BD \n",
- "13040 CA-ON \n",
- "13041 IT-25 \n",
- "13042 GB-ENG \n",
- "13043 SE-BD \n",
- "13044 US-FL \n",
- "13045 SE-BD \n",
- "13046 MX-DIF \n",
- "13047 SE-BD \n",
- "13048 SE-BD \n",
- "13049 SE-BD \n",
- "13050 IT-52 \n",
- "13051 IT-88 \n",
- "13052 GB-ENG \n",
- "13053 SE-BD \n",
- "13054 SE-BD \n",
- "13055 SE-BD \n",
- "13056 CH-ZH \n",
- "13057 RU-AMU \n",
- "13058 SE-BD \n",
- "\n",
- "[13059 rows x 3 columns]"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df['Carrier', 'Dest', 'OriginRegion']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "ed_df2 = ed_df['Carrier', 'Dest', 'OriginRegion']"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {
- "scrolled": true
- },
- "outputs": [],
- "source": [
- "t = ed_df2.head(10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Index: 10 entries, PNr3fWsBVUchcQJXWbLE to Rdr3fWsBVUchcQJXWbLE\n",
- "Data columns (total 3 columns):\n",
- "Carrier 10 non-null object\n",
- "Dest 10 non-null object\n",
- "OriginRegion 10 non-null object\n",
- "dtypes: object(3)\n",
- "memory usage: 320.0+ bytes\n"
- ]
- }
- ],
- "source": [
- "t.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [],
- "source": [
- "tt = t.to_sparse()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Index: 10 entries, PNr3fWsBVUchcQJXWbLE to Rdr3fWsBVUchcQJXWbLE\n",
- "Data columns (total 3 columns):\n",
- "Carrier 10 non-null Sparse[object, nan]\n",
- "Dest 10 non-null Sparse[object, nan]\n",
- "OriginRegion 10 non-null Sparse[object, nan]\n",
- "dtypes: Sparse[object, nan](3)\n",
- "memory usage: 344.0+ bytes\n"
- ]
- }
- ],
- "source": [
- "tt.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['PNr3fWsBVUchcQJXWbLE', 'Pdr3fWsBVUchcQJXWbLE', 'Ptr3fWsBVUchcQJXWbLE',\n",
- " 'P9r3fWsBVUchcQJXWbLE', 'QNr3fWsBVUchcQJXWbLE', 'Qdr3fWsBVUchcQJXWbLE',\n",
- " 'Qtr3fWsBVUchcQJXWbLE', 'Q9r3fWsBVUchcQJXWbLE', 'RNr3fWsBVUchcQJXWbLE',\n",
- " 'Rdr3fWsBVUchcQJXWbLE'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "tt.index"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "print(type(pd_df.columns))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 13059 entries, 0 to 13058\n",
- "Data columns (total 3 columns):\n",
- "Carrier 13059 non-null object\n",
- "Dest 13059 non-null object\n",
- "OriginRegion 13059 non-null object\n",
- "dtypes: object(3)\n",
- "memory usage: 56.0 bytes\n"
- ]
- }
- ],
- "source": [
- "ed_df2.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- " Carrier Dest \\\n",
- "0 Kibana Airlines Sydney Kingsford Smith International Airport \n",
- "1 Logstash Airways Venice Marco Polo Airport \n",
- "2 Logstash Airways Venice Marco Polo Airport \n",
- "3 Kibana Airlines Treviso-Sant'Angelo Airport \n",
- "4 Kibana Airlines Xi'an Xianyang International Airport \n",
- "5 JetBeats Genoa Cristoforo Colombo Airport \n",
- "6 JetBeats Zurich Airport \n",
- "7 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n",
- "8 Kibana Airlines Rajiv Gandhi International Airport \n",
- "9 Logstash Airways Treviso-Sant'Angelo Airport \n",
- "10 JetBeats Helsinki Vantaa Airport \n",
- "11 Logstash Airways Vienna International Airport \n",
- "12 Logstash Airways Shanghai Pudong International Airport \n",
- "13 Logstash Airways Ottawa Macdonald-Cartier International Airport \n",
- "14 Logstash Airways Luis Munoz Marin International Airport \n",
- "15 Kibana Airlines Cologne Bonn Airport \n",
- "16 Logstash Airways Venice Marco Polo Airport \n",
- "17 ES-Air Ministro Pistarini International Airport \n",
- "18 ES-Air Shanghai Pudong International Airport \n",
- "19 JetBeats Indira Gandhi International Airport \n",
- "20 JetBeats Wichita Mid Continent Airport \n",
- "21 ES-Air Ottawa Macdonald-Cartier International Airport \n",
- "22 JetBeats Itami Airport \n",
- "23 Logstash Airways Vienna International Airport \n",
- "24 Logstash Airways Charles de Gaulle International Airport \n",
- "25 ES-Air Narita International Airport \n",
- "26 Kibana Airlines Itami Airport \n",
- "27 JetBeats San Diego International Airport \n",
- "28 Kibana Airlines Verona Villafranca Airport \n",
- "29 Logstash Airways Zurich Airport \n",
- "... ... ... \n",
- "13029 Kibana Airlines Chengdu Shuangliu International Airport \n",
- "13030 ES-Air Narita International Airport \n",
- "13031 Kibana Airlines Narita International Airport \n",
- "13032 JetBeats Wichita Mid Continent Airport \n",
- "13033 Logstash Airways Sheremetyevo International Airport \n",
- "13034 ES-Air El Dorado International Airport \n",
- "13035 JetBeats Turin Airport \n",
- "13036 Kibana Airlines Winnipeg / James Armstrong Richardson Internat... \n",
- "13037 ES-Air Chengdu Shuangliu International Airport \n",
- "13038 JetBeats San Diego International Airport \n",
- "13039 ES-Air Ministro Pistarini International Airport \n",
- "13040 JetBeats Vienna International Airport \n",
- "13041 ES-Air Louisville International Standiford Field \n",
- "13042 Logstash Airways Ottawa Macdonald-Cartier International Airport \n",
- "13043 Kibana Airlines Shanghai Pudong International Airport \n",
- "13044 Kibana Airlines Zurich Airport \n",
- "13045 Kibana Airlines London Heathrow Airport \n",
- "13046 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n",
- "13047 ES-Air Manchester Airport \n",
- "13048 Logstash Airways Comodoro Arturo Merino Benitez International A... \n",
- "13049 Kibana Airlines Olenya Air Base \n",
- "13050 Logstash Airways Sheremetyevo International Airport \n",
- "13051 JetBeats Indira Gandhi International Airport \n",
- "13052 Logstash Airways Stockholm-Arlanda Airport \n",
- "13053 Kibana Airlines Rochester International Airport \n",
- "13054 Logstash Airways New Chitose Airport \n",
- "13055 Logstash Airways San Antonio International Airport \n",
- "13056 JetBeats Zurich Airport \n",
- "13057 ES-Air Vienna International Airport \n",
- "13058 Kibana Airlines Rajiv Gandhi International Airport \n",
- "\n",
- " OriginRegion \n",
- "0 DE-HE \n",
- "1 SE-BD \n",
- "2 IT-34 \n",
- "3 IT-72 \n",
- "4 MX-DIF \n",
- "5 CA-AB \n",
- "6 CH-ZH \n",
- "7 IT-62 \n",
- "8 IT-25 \n",
- "9 RU-MOS \n",
- "10 US-NM \n",
- "11 IT-34 \n",
- "12 MX-DIF \n",
- "13 IT-72 \n",
- "14 IT-62 \n",
- "15 SE-BD \n",
- "16 MX-DIF \n",
- "17 US-OH \n",
- "18 RU-MUR \n",
- "19 US-WY \n",
- "20 US-PA \n",
- "21 US-NJ \n",
- "22 DK-84 \n",
- "23 US-WA \n",
- "24 DE-BE \n",
- "25 GB-ENG \n",
- "26 FI-ES \n",
- "27 US-AZ \n",
- "28 SE-BD \n",
- "29 US-OK \n",
- "... ... \n",
- "13029 SE-BD \n",
- "13030 IT-82 \n",
- "13031 US-KY \n",
- "13032 US-WA \n",
- "13033 US-OR \n",
- "13034 SE-BD \n",
- "13035 US-NC \n",
- "13036 IT-34 \n",
- "13037 IT-82 \n",
- "13038 US-NY \n",
- "13039 SE-BD \n",
- "13040 CA-ON \n",
- "13041 IT-25 \n",
- "13042 GB-ENG \n",
- "13043 SE-BD \n",
- "13044 US-FL \n",
- "13045 SE-BD \n",
- "13046 MX-DIF \n",
- "13047 SE-BD \n",
- "13048 SE-BD \n",
- "13049 SE-BD \n",
- "13050 IT-52 \n",
- "13051 IT-88 \n",
- "13052 GB-ENG \n",
- "13053 SE-BD \n",
- "13054 SE-BD \n",
- "13055 SE-BD \n",
- "13056 CH-ZH \n",
- "13057 RU-AMU \n",
- "13058 SE-BD \n",
- "\n",
- "[13059 rows x 3 columns]"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ed_df2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/eland/utils.py b/eland/utils.py
index 074785b..a8e55a1 100644
--- a/eland/utils.py
+++ b/eland/utils.py
@@ -2,9 +2,11 @@ from eland import Client
from eland import DataFrame
from eland import Mappings
+
def read_es(es_params, index_pattern):
return DataFrame(client=es_params, index_pattern=index_pattern)
+
def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False):
"""
Append a pandas DataFrame to an Elasticsearch index.
@@ -45,8 +47,8 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=
elif if_exists == "replace":
client.indices().delete(destination_index)
client.indices().create(destination_index, mapping)
- #elif if_exists == "append":
- # TODO validate mapping is compatible
+ # elif if_exists == "append":
+ # TODO validate mapping is compatible
else:
client.indices().create(destination_index, mapping)
@@ -70,4 +72,3 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=
actions = []
client.bulk(actions, refresh=refresh)
-