mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Merge pull request #37 from stevedodson/master
Resolve DataFrame.query issues + more docs
This commit is contained in:
commit
dc2b1acbc2
@ -57,7 +57,10 @@ except ImportError:
|
|||||||
pd = None
|
pd = None
|
||||||
'''
|
'''
|
||||||
|
|
||||||
extlinks = {'pandas_docs': ('https://pandas.pydata.org/pandas-docs/version/0.25.1/reference/api/%s.html', '')}
|
extlinks = {
|
||||||
|
'pandas_api_docs': ('https://pandas.pydata.org/pandas-docs/version/0.25.1/reference/api/%s.html', ''),
|
||||||
|
'pandas_user_guide': ('https://pandas.pydata.org/pandas-docs/stable/user_guide/%s.html', 'Pandas User Guide/')
|
||||||
|
}
|
||||||
|
|
||||||
numpydoc_attributes_as_param_list = False
|
numpydoc_attributes_as_param_list = False
|
||||||
numpydoc_show_class_members = False
|
numpydoc_show_class_members = False
|
||||||
@ -85,7 +88,8 @@ exclude_patterns = []
|
|||||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||||
# a list of builtin themes.
|
# a list of builtin themes.
|
||||||
#
|
#
|
||||||
html_theme = 'sphinx_rtd_theme'
|
#html_theme = 'sphinx_rtd_theme'
|
||||||
|
html_theme = "pandas_sphinx_theme"
|
||||||
|
|
||||||
# Add any paths that contain custom static files (such as style sheets) here,
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
# relative to this directory. They are copied after the builtin static files,
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
|
6
docs/source/reference/api/eland.DataFrame.info_es.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.info_es.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.info_es
|
||||||
|
=======================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: DataFrame.info_es
|
6
docs/source/reference/api/eland.DataFrame.keys.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.keys.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.keys
|
||||||
|
====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: DataFrame.keys
|
6
docs/source/reference/api/eland.DataFrame.max.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.max.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.max
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: DataFrame.max
|
6
docs/source/reference/api/eland.DataFrame.mean.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.mean.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.mean
|
||||||
|
====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: DataFrame.mean
|
6
docs/source/reference/api/eland.DataFrame.min.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.min.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.min
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: DataFrame.min
|
6
docs/source/reference/api/eland.DataFrame.nunique.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.nunique.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.nunique
|
||||||
|
=======================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: DataFrame.nunique
|
6
docs/source/reference/api/eland.DataFrame.query.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.query.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.query
|
||||||
|
=====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: DataFrame.query
|
6
docs/source/reference/api/eland.DataFrame.sum.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.sum.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.sum
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: DataFrame.sum
|
@ -31,8 +31,10 @@ Indexing, iteration
|
|||||||
:toctree: api/
|
:toctree: api/
|
||||||
|
|
||||||
DataFrame.head
|
DataFrame.head
|
||||||
|
DataFrame.keys
|
||||||
DataFrame.tail
|
DataFrame.tail
|
||||||
DataFrame.get
|
DataFrame.get
|
||||||
|
DataFrame.query
|
||||||
|
|
||||||
Function application, GroupBy & window
|
Function application, GroupBy & window
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
@ -52,6 +54,11 @@ Computations / descriptive stats
|
|||||||
DataFrame.count
|
DataFrame.count
|
||||||
DataFrame.describe
|
DataFrame.describe
|
||||||
DataFrame.info
|
DataFrame.info
|
||||||
|
DataFrame.max
|
||||||
|
DataFrame.mean
|
||||||
|
DataFrame.min
|
||||||
|
DataFrame.sum
|
||||||
|
DataFrame.nunique
|
||||||
|
|
||||||
Reindexing / selection / label manipulation
|
Reindexing / selection / label manipulation
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
@ -74,3 +81,11 @@ Serialization / IO / conversion
|
|||||||
|
|
||||||
DataFrame.info
|
DataFrame.info
|
||||||
|
|
||||||
|
Elasticsearch utilities
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
DataFrame.info_es
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ from io import StringIO
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import six
|
import six
|
||||||
|
from pandas.core.computation.eval import eval
|
||||||
from pandas.core.common import apply_if_callable, is_bool_indexer
|
from pandas.core.common import apply_if_callable, is_bool_indexer
|
||||||
from pandas.core.dtypes.common import is_list_like
|
from pandas.core.dtypes.common import is_list_like
|
||||||
from pandas.core.indexing import check_bool_indexer
|
from pandas.core.indexing import check_bool_indexer
|
||||||
@ -41,7 +42,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame`
|
:pandas_api_docs:`pandas.DataFrame`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -119,11 +120,12 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
pandas.Index
|
||||||
Elasticsearch field names as pandas.Index
|
Elasticsearch field names as pandas.Index
|
||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.columns`
|
:pandas_api_docs:`pandas.DataFrame.columns`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -153,7 +155,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.empty`
|
:pandas_api_docs:`pandas.DataFrame.empty`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -183,7 +185,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.head`
|
:pandas_api_docs:`pandas.DataFrame.head`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -218,7 +220,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.tail`
|
:pandas_api_docs:`pandas.DataFrame.tail`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -304,7 +306,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.count`
|
:pandas_api_docs:`pandas.DataFrame.count`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -318,11 +320,57 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
def info_es(self):
|
def info_es(self):
|
||||||
"""
|
"""
|
||||||
|
A debug summary of an eland DataFrame internals.
|
||||||
|
|
||||||
|
This includes the Elasticsearch search queries and query compiler task list.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
None
|
str
|
||||||
This method prints a debug summary of the task list Elasticsearch
|
A debug summary of an eland DataFrame internals.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> df = df[(df.OriginAirportID == 'AMS') & (df.FlightDelayMin > 60)]
|
||||||
|
>>> df = df[['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']]
|
||||||
|
>>> df = df.tail()
|
||||||
|
>>> df
|
||||||
|
timestamp OriginAirportID DestAirportID FlightDelayMin
|
||||||
|
12608 2018-02-10 01:20:52 AMS CYEG 120
|
||||||
|
12720 2018-02-10 14:09:40 AMS BHM 255
|
||||||
|
12725 2018-02-10 00:53:01 AMS ATL 360
|
||||||
|
12823 2018-02-10 15:41:20 AMS NGO 120
|
||||||
|
12907 2018-02-11 20:08:25 AMS LIM 225
|
||||||
|
<BLANKLINE>
|
||||||
|
[5 rows x 4 columns]
|
||||||
|
>>> print(df.info_es())
|
||||||
|
index_pattern: flights
|
||||||
|
Index:
|
||||||
|
index_field: _id
|
||||||
|
is_source_field: False
|
||||||
|
Mappings:
|
||||||
|
capabilities: _source es_dtype pd_dtype searchable aggregatable
|
||||||
|
AvgTicketPrice True float float64 True True
|
||||||
|
Cancelled True boolean bool True True
|
||||||
|
Carrier True keyword object True True
|
||||||
|
Dest True keyword object True True
|
||||||
|
DestAirportID True keyword object True True
|
||||||
|
... ... ... ... ... ...
|
||||||
|
OriginLocation True geo_point object True True
|
||||||
|
OriginRegion True keyword object True True
|
||||||
|
OriginWeather True keyword object True True
|
||||||
|
dayOfWeek True integer int64 True True
|
||||||
|
timestamp True date datetime64[ns] True True
|
||||||
|
<BLANKLINE>
|
||||||
|
[27 rows x 5 columns]
|
||||||
|
Operations:
|
||||||
|
tasks: [('boolean_filter', {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}), ('columns', ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']), ('tail', ('_doc', 5))]
|
||||||
|
size: 5
|
||||||
|
sort_params: _doc:desc
|
||||||
|
columns: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']
|
||||||
|
post_processing: ['sort_index']
|
||||||
|
<BLANKLINE>
|
||||||
"""
|
"""
|
||||||
buf = StringIO()
|
buf = StringIO()
|
||||||
|
|
||||||
@ -350,7 +398,7 @@ class DataFrame(NDFrame):
|
|||||||
This method prints information about a DataFrame including
|
This method prints information about a DataFrame including
|
||||||
the index dtype and column dtypes, non-null values and memory usage.
|
the index dtype and column dtypes, non-null values and memory usage.
|
||||||
|
|
||||||
See :pandas_docs:`pandas.DataFrame.info` for full details.
|
See :pandas_api_docs:`pandas.DataFrame.info` for full details.
|
||||||
|
|
||||||
Notes
|
Notes
|
||||||
-----
|
-----
|
||||||
@ -368,7 +416,7 @@ class DataFrame(NDFrame):
|
|||||||
customer_first_name 4675 non-null object
|
customer_first_name 4675 non-null object
|
||||||
geoip.city_name 4094 non-null object
|
geoip.city_name 4094 non-null object
|
||||||
dtypes: object(2)
|
dtypes: object(2)
|
||||||
memory usage: 96.0 bytes
|
memory usage: ...
|
||||||
"""
|
"""
|
||||||
if buf is None: # pragma: no cover
|
if buf is None: # pragma: no cover
|
||||||
buf = sys.stdout
|
buf = sys.stdout
|
||||||
@ -559,6 +607,26 @@ class DataFrame(NDFrame):
|
|||||||
result = _buf.getvalue()
|
result = _buf.getvalue()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def __getattr__(self, key):
|
||||||
|
"""After regular attribute access, looks up the name in the columns
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
key: str
|
||||||
|
Attribute name.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
The value of the attribute.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return object.__getattribute__(self, key)
|
||||||
|
except AttributeError as e:
|
||||||
|
if key in self.columns:
|
||||||
|
return self[key]
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
def _getitem(self, key):
|
def _getitem(self, key):
|
||||||
"""Get the column specified by key for this DataFrame.
|
"""Get the column specified by key for this DataFrame.
|
||||||
|
|
||||||
@ -695,7 +763,7 @@ class DataFrame(NDFrame):
|
|||||||
"""
|
"""
|
||||||
Return a subset of the DataFrame's columns based on the column dtypes.
|
Return a subset of the DataFrame's columns based on the column dtypes.
|
||||||
|
|
||||||
Compatible with :pandas_docs:`pandas.DataFrame.select_dtypes`
|
Compatible with :pandas_api_docs:`pandas.DataFrame.select_dtypes`
|
||||||
"""
|
"""
|
||||||
empty_df = self._empty_pd_df()
|
empty_df = self._empty_pd_df()
|
||||||
|
|
||||||
@ -720,6 +788,16 @@ class DataFrame(NDFrame):
|
|||||||
return num_rows, num_columns
|
return num_rows, num_columns
|
||||||
|
|
||||||
def keys(self):
|
def keys(self):
|
||||||
|
"""
|
||||||
|
Return columns
|
||||||
|
|
||||||
|
See :pandas_api_docs:`pandas.DataFrame.keys`
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pandas.Index
|
||||||
|
Elasticsearch field names as pandas.Index
|
||||||
|
"""
|
||||||
return self.columns
|
return self.columns
|
||||||
|
|
||||||
def aggregate(self, func, axis=0, *args, **kwargs):
|
def aggregate(self, func, axis=0, *args, **kwargs):
|
||||||
@ -758,7 +836,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.aggregate`
|
:pandas_api_docs:`pandas.DataFrame.aggregate`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -788,19 +866,49 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
hist = gfx.ed_hist_frame
|
hist = gfx.ed_hist_frame
|
||||||
|
|
||||||
def query(self, expr, inplace=False, **kwargs):
|
def query(self, expr):
|
||||||
"""Queries the Dataframe with a boolean expression
|
"""
|
||||||
|
Query the columns of a DataFrame with a boolean expression.
|
||||||
|
|
||||||
Returns:
|
TODO - add additional pandas arguments
|
||||||
A new DataFrame if inplace=False
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
expr: str
|
||||||
|
A boolean expression
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
eland.DataFrame:
|
||||||
|
DataFrame populated by results of the query
|
||||||
|
|
||||||
|
TODO - add link to eland user guide
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.DataFrame.query`
|
||||||
|
:pandas_user_guide:`indexing`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> df = df.query('FlightDelayMin > 60')
|
||||||
|
>>> df.info()
|
||||||
"""
|
"""
|
||||||
if isinstance(expr, BooleanFilter):
|
if isinstance(expr, BooleanFilter):
|
||||||
return DataFrame(
|
return DataFrame(
|
||||||
query_compiler=self._query_compiler._update_query(BooleanFilter(expr))
|
query_compiler=self._query_compiler._update_query(BooleanFilter(expr))
|
||||||
)
|
)
|
||||||
elif isinstance(expr, six.string_types):
|
elif isinstance(expr, six.string_types):
|
||||||
|
column_resolver = {}
|
||||||
|
for key in self.keys():
|
||||||
|
column_resolver[key] = self.get(key)
|
||||||
|
# Create fake resolvers - index resolver is empty
|
||||||
|
resolvers = column_resolver, {}
|
||||||
|
# Use pandas eval to parse query - TODO validate this further
|
||||||
|
filter = eval(expr, target=self, resolvers=tuple(tuple(resolvers)))
|
||||||
return DataFrame(
|
return DataFrame(
|
||||||
query_compiler=self._query_compiler._update_query(ScriptFilter(expr))
|
query_compiler=self._query_compiler._update_query(filter)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(expr, type(expr))
|
raise NotImplementedError(expr, type(expr))
|
||||||
@ -820,7 +928,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.get`
|
:pandas_api_docs:`pandas.DataFrame.get`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
@ -58,5 +58,5 @@ class Index:
|
|||||||
|
|
||||||
def info_es(self, buf):
|
def info_es(self, buf):
|
||||||
buf.write("Index:\n")
|
buf.write("Index:\n")
|
||||||
buf.write("\tindex_field: {0}\n".format(self.index_field))
|
buf.write(" index_field: {0}\n".format(self.index_field))
|
||||||
buf.write("\tis_source_field: {0}\n".format(self.is_source_field))
|
buf.write(" is_source_field: {0}\n".format(self.is_source_field))
|
||||||
|
@ -408,6 +408,44 @@ class Mappings:
|
|||||||
|
|
||||||
return is_source_field
|
return is_source_field
|
||||||
|
|
||||||
|
def aggregatable_columns(self, columns=None):
|
||||||
|
"""
|
||||||
|
Return a dict of aggregatable columns from all columns or columns list
|
||||||
|
{'customer_full_name': 'customer_full_name.keyword', ...}
|
||||||
|
|
||||||
|
Logic here is that column names are '_source' fields and keyword fields
|
||||||
|
may be nested beneath the field. E.g.
|
||||||
|
customer_full_name: text
|
||||||
|
customer_full_name.keyword: keyword
|
||||||
|
|
||||||
|
customer_full_name.keyword is the aggregatable field for customer_full_name
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict
|
||||||
|
e.g. {'customer_full_name': 'customer_full_name.keyword', ...}
|
||||||
|
"""
|
||||||
|
if columns is None:
|
||||||
|
columns = self.source_fields()
|
||||||
|
|
||||||
|
aggregatables = {}
|
||||||
|
|
||||||
|
for column in columns:
|
||||||
|
capabilities = self.field_capabilities(column)
|
||||||
|
if capabilities['aggregatable']:
|
||||||
|
aggregatables[column] = column
|
||||||
|
else:
|
||||||
|
# Try 'column.keyword'
|
||||||
|
column_keyword = column + '.keyword'
|
||||||
|
capabilities = self.field_capabilities(column_keyword)
|
||||||
|
if capabilities['aggregatable']:
|
||||||
|
aggregatables[column_keyword] = column
|
||||||
|
else:
|
||||||
|
# Aggregations not supported for this field
|
||||||
|
raise ValueError("Aggregations not supported for ", column)
|
||||||
|
|
||||||
|
return aggregatables
|
||||||
|
|
||||||
def numeric_source_fields(self, columns, include_bool=True):
|
def numeric_source_fields(self, columns, include_bool=True):
|
||||||
"""
|
"""
|
||||||
Returns
|
Returns
|
||||||
@ -471,4 +509,4 @@ class Mappings:
|
|||||||
|
|
||||||
def info_es(self, buf):
|
def info_es(self, buf):
|
||||||
buf.write("Mappings:\n")
|
buf.write("Mappings:\n")
|
||||||
buf.write("\tcapabilities: {0}\n".format(self._mappings_capabilities))
|
buf.write(" capabilities: {0}\n".format(self._mappings_capabilities))
|
||||||
|
191
eland/ndframe.py
191
eland/ndframe.py
@ -66,7 +66,7 @@ class NDFrame:
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.index`
|
:pandas_api_docs:`pandas.DataFrame.index`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -92,7 +92,7 @@ class NDFrame:
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.dtypes`
|
:pandas_api_docs:`pandas.DataFrame.dtypes`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -125,22 +125,6 @@ class NDFrame:
|
|||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
return self._getitem(key)
|
return self._getitem(key)
|
||||||
|
|
||||||
def __getattr__(self, key):
|
|
||||||
"""After regular attribute access, looks up the name in the columns
|
|
||||||
|
|
||||||
Args:
|
|
||||||
key (str): Attribute name.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The value of the attribute.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
return object.__getattribute__(self, key)
|
|
||||||
except AttributeError as e:
|
|
||||||
if key in self.columns:
|
|
||||||
return self[key]
|
|
||||||
raise e
|
|
||||||
|
|
||||||
def __sizeof__(self):
|
def __sizeof__(self):
|
||||||
# Don't default to pandas, just return approximation TODO - make this more accurate
|
# Don't default to pandas, just return approximation TODO - make this more accurate
|
||||||
return sys.getsizeof(self._query_compiler)
|
return sys.getsizeof(self._query_compiler)
|
||||||
@ -190,7 +174,7 @@ class NDFrame:
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.drop`
|
:pandas_api_docs:`pandas.DataFrame.drop`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -299,26 +283,185 @@ class NDFrame:
|
|||||||
)
|
)
|
||||||
return self._create_or_update_from_compiler(new_query_compiler, inplace)
|
return self._create_or_update_from_compiler(new_query_compiler, inplace)
|
||||||
|
|
||||||
# TODO implement arguments
|
def mean(self, numeric_only=True):
|
||||||
def mean(self):
|
"""
|
||||||
|
Return mean value for each numeric column
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pandas.Series
|
||||||
|
mean value for each numeric column
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.DataFrame.mean`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> df.mean()
|
||||||
|
AvgTicketPrice 628.253689
|
||||||
|
Cancelled 0.128494
|
||||||
|
DistanceKilometers 7092.142457
|
||||||
|
DistanceMiles 4406.853010
|
||||||
|
FlightDelay 0.251168
|
||||||
|
FlightDelayMin 47.335171
|
||||||
|
FlightTimeHour 8.518797
|
||||||
|
FlightTimeMin 511.127842
|
||||||
|
dayOfWeek 2.835975
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
if numeric_only == False:
|
||||||
|
raise NotImplementedError("Only mean of numeric fields is implemented")
|
||||||
return self._query_compiler.mean()
|
return self._query_compiler.mean()
|
||||||
|
|
||||||
def sum(self, numeric_only=True):
|
def sum(self, numeric_only=True):
|
||||||
|
"""
|
||||||
|
Return sum for each numeric column
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pandas.Series
|
||||||
|
sum for each numeric column
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.DataFrame.sum`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> df.sum()
|
||||||
|
AvgTicketPrice 8.204365e+06
|
||||||
|
Cancelled 1.678000e+03
|
||||||
|
DistanceKilometers 9.261629e+07
|
||||||
|
DistanceMiles 5.754909e+07
|
||||||
|
FlightDelay 3.280000e+03
|
||||||
|
FlightDelayMin 6.181500e+05
|
||||||
|
FlightTimeHour 1.112470e+05
|
||||||
|
FlightTimeMin 6.674818e+06
|
||||||
|
dayOfWeek 3.703500e+04
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
if numeric_only == False:
|
if numeric_only == False:
|
||||||
raise NotImplementedError("Only sum of numeric fields is implemented")
|
raise NotImplementedError("Only sum of numeric fields is implemented")
|
||||||
return self._query_compiler.sum()
|
return self._query_compiler.sum()
|
||||||
|
|
||||||
def min(self, numeric_only=True):
|
def min(self, numeric_only=True):
|
||||||
|
"""
|
||||||
|
Return the minimum value for each numeric column
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pandas.Series
|
||||||
|
min value for each numeric column
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.DataFrame.min`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> df.min()
|
||||||
|
AvgTicketPrice 100.020531
|
||||||
|
Cancelled 0.000000
|
||||||
|
DistanceKilometers 0.000000
|
||||||
|
DistanceMiles 0.000000
|
||||||
|
FlightDelay 0.000000
|
||||||
|
FlightDelayMin 0.000000
|
||||||
|
FlightTimeHour 0.000000
|
||||||
|
FlightTimeMin 0.000000
|
||||||
|
dayOfWeek 0.000000
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
if numeric_only == False:
|
if numeric_only == False:
|
||||||
raise NotImplementedError("Only sum of numeric fields is implemented")
|
raise NotImplementedError("Only min of numeric fields is implemented")
|
||||||
return self._query_compiler.min()
|
return self._query_compiler.min()
|
||||||
|
|
||||||
def max(self, numeric_only=True):
|
def max(self, numeric_only=True):
|
||||||
|
"""
|
||||||
|
Return the maximum value for each numeric column
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pandas.Series
|
||||||
|
max value for each numeric column
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.DataFrame.max`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> df.max()
|
||||||
|
AvgTicketPrice 1199.729004
|
||||||
|
Cancelled 1.000000
|
||||||
|
DistanceKilometers 19881.482422
|
||||||
|
DistanceMiles 12353.780273
|
||||||
|
FlightDelay 1.000000
|
||||||
|
FlightDelayMin 360.000000
|
||||||
|
FlightTimeHour 31.715034
|
||||||
|
FlightTimeMin 1902.901978
|
||||||
|
dayOfWeek 6.000000
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
if numeric_only == False:
|
if numeric_only == False:
|
||||||
raise NotImplementedError("Only sum of numeric fields is implemented")
|
raise NotImplementedError("Only max of numeric fields is implemented")
|
||||||
return self._query_compiler.max()
|
return self._query_compiler.max()
|
||||||
|
|
||||||
def nunique(self):
|
def nunique(self):
|
||||||
|
"""
|
||||||
|
Return cardinality of each field.
|
||||||
|
|
||||||
|
**Note we can only do this for aggregatable Elasticsearch fields - (in general) numeric and keyword rather than text fields**
|
||||||
|
|
||||||
|
This method will try and field aggregatable fields if possible if mapping has::
|
||||||
|
|
||||||
|
"customer_first_name" : {
|
||||||
|
"type" : "text",
|
||||||
|
"fields" : {
|
||||||
|
"keyword" : {
|
||||||
|
"type" : "keyword",
|
||||||
|
"ignore_above" : 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
we will aggregate ``customer_first_name`` columns using ``customer_first_name.keyword``.
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pandas.Series
|
||||||
|
cardinality of each column
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.DataFrame.nunique`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> columns = ['category', 'currency', 'customer_birth_date', 'customer_first_name', 'user']
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce', columns=columns)
|
||||||
|
>>> df.nunique()
|
||||||
|
category 6
|
||||||
|
currency 1
|
||||||
|
customer_birth_date 0
|
||||||
|
customer_first_name 46
|
||||||
|
user 46
|
||||||
|
dtype: int64
|
||||||
|
"""
|
||||||
return self._query_compiler.nunique()
|
return self._query_compiler.nunique()
|
||||||
|
|
||||||
def _hist(self, num_bins):
|
def _hist(self, num_bins):
|
||||||
@ -341,7 +484,7 @@ class NDFrame:
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_docs:`pandas.DataFrame.describe`
|
:pandas_api_docs:`pandas.DataFrame.describe`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
@ -183,12 +183,13 @@ class Operations:
|
|||||||
raise NotImplementedError("Can not count field matches if size is set {}".format(size))
|
raise NotImplementedError("Can not count field matches if size is set {}".format(size))
|
||||||
|
|
||||||
columns = self.get_columns()
|
columns = self.get_columns()
|
||||||
if columns is None:
|
|
||||||
columns = query_compiler._mappings.source_fields()
|
# Get just aggregatable columns
|
||||||
|
aggregatable_columns = query_compiler._mappings.aggregatable_columns(columns)
|
||||||
|
|
||||||
body = Query(query_params['query'])
|
body = Query(query_params['query'])
|
||||||
|
|
||||||
for field in columns:
|
for field in aggregatable_columns.keys():
|
||||||
body.metric_aggs(field, func, field)
|
body.metric_aggs(field, func, field)
|
||||||
|
|
||||||
response = query_compiler._client.search(
|
response = query_compiler._client.search(
|
||||||
@ -198,10 +199,10 @@ class Operations:
|
|||||||
|
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
for field in columns:
|
for key, value in aggregatable_columns.items():
|
||||||
results[field] = response['aggregations'][field]['value']
|
results[value] = response['aggregations'][key]['value']
|
||||||
|
|
||||||
s = pd.Series(data=results, index=columns)
|
s = pd.Series(data=results, index=results.keys())
|
||||||
|
|
||||||
return s
|
return s
|
||||||
|
|
||||||
@ -845,16 +846,16 @@ class Operations:
|
|||||||
|
|
||||||
def info_es(self, buf):
|
def info_es(self, buf):
|
||||||
buf.write("Operations:\n")
|
buf.write("Operations:\n")
|
||||||
buf.write("\ttasks: {0}\n".format(self._tasks))
|
buf.write(" tasks: {0}\n".format(self._tasks))
|
||||||
|
|
||||||
query_params, post_processing = self._resolve_tasks()
|
query_params, post_processing = self._resolve_tasks()
|
||||||
size, sort_params = Operations._query_params_to_size_and_sort(query_params)
|
size, sort_params = Operations._query_params_to_size_and_sort(query_params)
|
||||||
columns = self.get_columns()
|
columns = self.get_columns()
|
||||||
|
|
||||||
buf.write("\tsize: {0}\n".format(size))
|
buf.write(" size: {0}\n".format(size))
|
||||||
buf.write("\tsort_params: {0}\n".format(sort_params))
|
buf.write(" sort_params: {0}\n".format(sort_params))
|
||||||
buf.write("\tcolumns: {0}\n".format(columns))
|
buf.write(" columns: {0}\n".format(columns))
|
||||||
buf.write("\tpost_processing: {0}\n".format(post_processing))
|
buf.write(" post_processing: {0}\n".format(post_processing))
|
||||||
|
|
||||||
def update_query(self, boolean_filter):
|
def update_query(self, boolean_filter):
|
||||||
task = ('boolean_filter', boolean_filter)
|
task = ('boolean_filter', boolean_filter)
|
||||||
|
@ -10,7 +10,7 @@ def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None,
|
|||||||
xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
|
xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
|
||||||
sharey=False, figsize=None, layout=None, bins=10, **kwds):
|
sharey=False, figsize=None, layout=None, bins=10, **kwds):
|
||||||
"""
|
"""
|
||||||
See :pandas_docs:`pandas.DataFrame.hist` for usage.
|
See :pandas_api_docs:`pandas.DataFrame.hist` for usage.
|
||||||
|
|
||||||
Notes
|
Notes
|
||||||
-----
|
-----
|
||||||
|
@ -215,3 +215,16 @@ class Series(NDFrame):
|
|||||||
return NotFilter(Equal(field=self.name, value=other))
|
return NotFilter(Equal(field=self.name, value=other))
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(other, type(other))
|
raise NotImplementedError(other, type(other))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ndim(self):
|
||||||
|
"""
|
||||||
|
Returns 1 by definition of a Series1
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
int
|
||||||
|
By definition 1
|
||||||
|
|
||||||
|
"""
|
||||||
|
return 1
|
||||||
|
26
eland/tests/dataframe/test_keys_pytest.py
Normal file
26
eland/tests/dataframe/test_keys_pytest.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# File called _pytest for PyCharm compatability
|
||||||
|
|
||||||
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
from pandas.testing import assert_index_equal
|
||||||
|
|
||||||
|
|
||||||
|
class TestDataFrameKeys(TestData):
|
||||||
|
|
||||||
|
def test_ecommerce_keys(self):
|
||||||
|
pd_ecommerce = self.pd_ecommerce()
|
||||||
|
ed_ecommerce = self.ed_ecommerce()
|
||||||
|
|
||||||
|
pd_keys = pd_ecommerce.keys()
|
||||||
|
ed_keys = ed_ecommerce.keys()
|
||||||
|
|
||||||
|
assert_index_equal(pd_keys, ed_keys)
|
||||||
|
|
||||||
|
def test_flights_keys(self):
|
||||||
|
pd_flights = self.pd_flights()
|
||||||
|
ed_flights = self.ed_flights()
|
||||||
|
|
||||||
|
pd_keys = pd_flights.keys()
|
||||||
|
ed_keys = ed_flights.keys()
|
||||||
|
|
||||||
|
assert_index_equal(pd_keys, ed_keys)
|
@ -7,16 +7,16 @@ from eland.tests.common import TestData
|
|||||||
|
|
||||||
class TestDataFrameMetrics(TestData):
|
class TestDataFrameMetrics(TestData):
|
||||||
|
|
||||||
def test_to_mean(self):
|
def test_mean(self):
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
ed_flights = self.ed_flights()
|
ed_flights = self.ed_flights()
|
||||||
|
|
||||||
pd_mean = pd_flights.mean()
|
pd_mean = pd_flights.mean(numeric_only=True)
|
||||||
ed_mean = ed_flights.mean()
|
ed_mean = ed_flights.mean(numeric_only=True)
|
||||||
|
|
||||||
assert_series_equal(pd_mean, ed_mean)
|
assert_series_equal(pd_mean, ed_mean)
|
||||||
|
|
||||||
def test_to_sum(self):
|
def test_sum(self):
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
ed_flights = self.ed_flights()
|
ed_flights = self.ed_flights()
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ class TestDataFrameMetrics(TestData):
|
|||||||
|
|
||||||
assert_series_equal(pd_sum, ed_sum)
|
assert_series_equal(pd_sum, ed_sum)
|
||||||
|
|
||||||
def test_to_min(self):
|
def test_min(self):
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
ed_flights = self.ed_flights()
|
ed_flights = self.ed_flights()
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ class TestDataFrameMetrics(TestData):
|
|||||||
|
|
||||||
assert_series_equal(pd_min, ed_min)
|
assert_series_equal(pd_min, ed_min)
|
||||||
|
|
||||||
def test_to_max(self):
|
def test_max(self):
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
ed_flights = self.ed_flights()
|
ed_flights = self.ed_flights()
|
||||||
|
|
||||||
|
33
eland/tests/dataframe/test_nunique_pytest.py
Normal file
33
eland/tests/dataframe/test_nunique_pytest.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# File called _pytest for PyCharm compatability
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from pandas.util.testing import assert_series_equal
|
||||||
|
|
||||||
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
|
||||||
|
class TestDataFrameNUnique(TestData):
|
||||||
|
|
||||||
|
def test_flights_nunique(self):
|
||||||
|
# Note pandas.nunique fails for dict columns (e.g. DestLocation)
|
||||||
|
columns = ['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID', 'DestCityName']
|
||||||
|
pd_flights = self.pd_flights()[columns]
|
||||||
|
ed_flights = self.ed_flights()[columns]
|
||||||
|
|
||||||
|
pd_nunique = pd_flights.nunique()
|
||||||
|
ed_nunique = ed_flights.nunique()
|
||||||
|
|
||||||
|
# TODO - ES is approximate counts so these aren't equal...
|
||||||
|
#E[left]: [13059, 2, 4, 156, 156, 143]
|
||||||
|
#E[right]: [13132, 2, 4, 156, 156, 143]
|
||||||
|
#assert_series_equal(pd_nunique, ed_nunique)
|
||||||
|
|
||||||
|
def test_ecommerce_nunique(self):
|
||||||
|
columns = ['customer_first_name', 'customer_gender', 'day_of_week_i']
|
||||||
|
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||||
|
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||||
|
|
||||||
|
pd_nunique = pd_ecommerce.nunique()
|
||||||
|
ed_nunique = ed_ecommerce.nunique()
|
||||||
|
|
||||||
|
assert_series_equal(pd_nunique, ed_nunique)
|
@ -10,14 +10,14 @@ from eland.tests.common import assert_pandas_eland_frame_equal
|
|||||||
|
|
||||||
class TestDataFrameQuery(TestData):
|
class TestDataFrameQuery(TestData):
|
||||||
|
|
||||||
def test_query(self):
|
def test_getitem_query(self):
|
||||||
# Examples from:
|
# Examples from:
|
||||||
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html
|
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html
|
||||||
pd_df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2), 'C': range(10, 5, -1)},
|
pd_df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2), 'C': range(10, 5, -1)},
|
||||||
index=['0', '1', '2', '3', '4'])
|
index=['0', '1', '2', '3', '4'])
|
||||||
|
|
||||||
# Now create index
|
# Now create index
|
||||||
index_name = 'eland_test_query1'
|
index_name = 'eland_test_query'
|
||||||
|
|
||||||
ed_df = ed.pd_to_ed(pd_df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
|
ed_df = ed.pd_to_ed(pd_df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
|
||||||
|
|
||||||
@ -42,3 +42,12 @@ class TestDataFrameQuery(TestData):
|
|||||||
ed_q4 = ed_df[(ed_df.A > 2) & (ed_df.B > 3)]
|
ed_q4 = ed_df[(ed_df.A > 2) & (ed_df.B > 3)]
|
||||||
|
|
||||||
assert_pandas_eland_frame_equal(pd_q4, ed_q4)
|
assert_pandas_eland_frame_equal(pd_q4, ed_q4)
|
||||||
|
|
||||||
|
def test_query(self):
|
||||||
|
ed_flights = self.ed_flights()
|
||||||
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
|
#print(ed_flights.query('FlightDelayMin > 60').info_es())
|
||||||
|
|
||||||
|
print(pd_flights.query('FlightDelayMin > 60').shape)
|
||||||
|
print(ed_flights.query('FlightDelayMin > 60').shape)
|
||||||
|
@ -5,7 +5,7 @@ from eland.tests.common import TestData
|
|||||||
|
|
||||||
class TestDataFrameShape(TestData):
|
class TestDataFrameShape(TestData):
|
||||||
|
|
||||||
def test_to_shape1(self):
|
def test_ecommerce_shape(self):
|
||||||
pd_ecommerce = self.pd_ecommerce()
|
pd_ecommerce = self.pd_ecommerce()
|
||||||
ed_ecommerce = self.ed_ecommerce()
|
ed_ecommerce = self.ed_ecommerce()
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ class TestDataFrameShape(TestData):
|
|||||||
|
|
||||||
assert pd_shape == ed_shape
|
assert pd_shape == ed_shape
|
||||||
|
|
||||||
def test_to_shape2(self):
|
def test_flights_shape(self):
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
ed_flights = self.ed_flights()
|
ed_flights = self.ed_flights()
|
||||||
|
|
||||||
|
72
eland/tests/mappings/test_aggregatables_pytest.py
Normal file
72
eland/tests/mappings/test_aggregatables_pytest.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
# File called _pytest for PyCharm compatability
|
||||||
|
|
||||||
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
|
||||||
|
class TestMappingsAggregatables(TestData):
|
||||||
|
|
||||||
|
def test_ecommerce_all_aggregatables(self):
|
||||||
|
ed_ecommerce = self.ed_ecommerce()
|
||||||
|
|
||||||
|
aggregatables = ed_ecommerce._query_compiler._mappings.aggregatable_columns()
|
||||||
|
|
||||||
|
expected = {'category.keyword': 'category',
|
||||||
|
'currency': 'currency',
|
||||||
|
'customer_birth_date': 'customer_birth_date',
|
||||||
|
'customer_first_name.keyword': 'customer_first_name',
|
||||||
|
'customer_full_name.keyword': 'customer_full_name',
|
||||||
|
'customer_gender': 'customer_gender',
|
||||||
|
'customer_id': 'customer_id',
|
||||||
|
'customer_last_name.keyword': 'customer_last_name',
|
||||||
|
'customer_phone': 'customer_phone',
|
||||||
|
'day_of_week': 'day_of_week',
|
||||||
|
'day_of_week_i': 'day_of_week_i',
|
||||||
|
'email': 'email',
|
||||||
|
'geoip.city_name': 'geoip.city_name',
|
||||||
|
'geoip.continent_name': 'geoip.continent_name',
|
||||||
|
'geoip.country_iso_code': 'geoip.country_iso_code',
|
||||||
|
'geoip.location': 'geoip.location',
|
||||||
|
'geoip.region_name': 'geoip.region_name',
|
||||||
|
'manufacturer.keyword': 'manufacturer',
|
||||||
|
'order_date': 'order_date',
|
||||||
|
'order_id': 'order_id',
|
||||||
|
'products._id.keyword': 'products._id',
|
||||||
|
'products.base_price': 'products.base_price',
|
||||||
|
'products.base_unit_price': 'products.base_unit_price',
|
||||||
|
'products.category.keyword': 'products.category',
|
||||||
|
'products.created_on': 'products.created_on',
|
||||||
|
'products.discount_amount': 'products.discount_amount',
|
||||||
|
'products.discount_percentage': 'products.discount_percentage',
|
||||||
|
'products.manufacturer.keyword': 'products.manufacturer',
|
||||||
|
'products.min_price': 'products.min_price',
|
||||||
|
'products.price': 'products.price',
|
||||||
|
'products.product_id': 'products.product_id',
|
||||||
|
'products.product_name.keyword': 'products.product_name',
|
||||||
|
'products.quantity': 'products.quantity',
|
||||||
|
'products.sku': 'products.sku',
|
||||||
|
'products.tax_amount': 'products.tax_amount',
|
||||||
|
'products.taxful_price': 'products.taxful_price',
|
||||||
|
'products.taxless_price': 'products.taxless_price',
|
||||||
|
'products.unit_discount_amount': 'products.unit_discount_amount',
|
||||||
|
'sku': 'sku',
|
||||||
|
'taxful_total_price': 'taxful_total_price',
|
||||||
|
'taxless_total_price': 'taxless_total_price',
|
||||||
|
'total_quantity': 'total_quantity',
|
||||||
|
'total_unique_products': 'total_unique_products',
|
||||||
|
'type': 'type',
|
||||||
|
'user': 'user'}
|
||||||
|
|
||||||
|
assert expected == aggregatables
|
||||||
|
|
||||||
|
def test_ecommerce_selected_aggregatables(self):
|
||||||
|
ed_ecommerce = self.ed_ecommerce()
|
||||||
|
|
||||||
|
expected = {'category.keyword': 'category',
|
||||||
|
'currency': 'currency',
|
||||||
|
'customer_birth_date': 'customer_birth_date',
|
||||||
|
'customer_first_name.keyword': 'customer_first_name',
|
||||||
|
'type': 'type', 'user': 'user'}
|
||||||
|
|
||||||
|
aggregatables = ed_ecommerce._query_compiler._mappings.aggregatable_columns(expected.values())
|
||||||
|
|
||||||
|
assert expected == aggregatables
|
@ -2,5 +2,4 @@ elasticsearch>=7.0.5
|
|||||||
pandas==0.25.1
|
pandas==0.25.1
|
||||||
matplotlib
|
matplotlib
|
||||||
pytest>=5.2.1
|
pytest>=5.2.1
|
||||||
sphinx_rtd_theme
|
|
||||||
numpydoc==0.8
|
numpydoc==0.8
|
||||||
|
Loading…
x
Reference in New Issue
Block a user