mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Added Series metric aggs + Series docs
Also, improved Series.to_string()
This commit is contained in:
parent
5d119215f8
commit
84e23ab5d1
6
docs/source/reference/api/eland.Series.add.rst
Normal file
6
docs/source/reference/api/eland.Series.add.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.add
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.add
|
6
docs/source/reference/api/eland.Series.describe.rst
Normal file
6
docs/source/reference/api/eland.Series.describe.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.describe
|
||||
=====================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.describe
|
6
docs/source/reference/api/eland.Series.div.rst
Normal file
6
docs/source/reference/api/eland.Series.div.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.div
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.div
|
6
docs/source/reference/api/eland.Series.empty.rst
Normal file
6
docs/source/reference/api/eland.Series.empty.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.empty
|
||||
==================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. autoattribute:: Series.empty
|
6
docs/source/reference/api/eland.Series.floordiv.rst
Normal file
6
docs/source/reference/api/eland.Series.floordiv.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.floordiv
|
||||
=====================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.floordiv
|
6
docs/source/reference/api/eland.Series.head.rst
Normal file
6
docs/source/reference/api/eland.Series.head.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.head
|
||||
=================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.head
|
6
docs/source/reference/api/eland.Series.index.rst
Normal file
6
docs/source/reference/api/eland.Series.index.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.index
|
||||
==================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. autoattribute:: Series.index
|
6
docs/source/reference/api/eland.Series.max.rst
Normal file
6
docs/source/reference/api/eland.Series.max.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.max
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.max
|
6
docs/source/reference/api/eland.Series.mean.rst
Normal file
6
docs/source/reference/api/eland.Series.mean.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.mean
|
||||
=================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.mean
|
6
docs/source/reference/api/eland.Series.min.rst
Normal file
6
docs/source/reference/api/eland.Series.min.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.min
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.min
|
6
docs/source/reference/api/eland.Series.mod.rst
Normal file
6
docs/source/reference/api/eland.Series.mod.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.mod
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.mod
|
6
docs/source/reference/api/eland.Series.mul.rst
Normal file
6
docs/source/reference/api/eland.Series.mul.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.mul
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.mul
|
6
docs/source/reference/api/eland.Series.name.rst
Normal file
6
docs/source/reference/api/eland.Series.name.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.name
|
||||
=================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. autoattribute:: Series.name
|
6
docs/source/reference/api/eland.Series.nunique.rst
Normal file
6
docs/source/reference/api/eland.Series.nunique.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.nunique
|
||||
====================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.nunique
|
6
docs/source/reference/api/eland.Series.pow.rst
Normal file
6
docs/source/reference/api/eland.Series.pow.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.pow
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.pow
|
6
docs/source/reference/api/eland.Series.rename.rst
Normal file
6
docs/source/reference/api/eland.Series.rename.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.rename
|
||||
===================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.rename
|
6
docs/source/reference/api/eland.Series.rst
Normal file
6
docs/source/reference/api/eland.Series.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series
|
||||
============
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. autoclass:: Series
|
6
docs/source/reference/api/eland.Series.shape.rst
Normal file
6
docs/source/reference/api/eland.Series.shape.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.shape
|
||||
==================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. autoattribute:: Series.shape
|
6
docs/source/reference/api/eland.Series.sub.rst
Normal file
6
docs/source/reference/api/eland.Series.sub.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.sub
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.sub
|
6
docs/source/reference/api/eland.Series.sum.rst
Normal file
6
docs/source/reference/api/eland.Series.sum.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.sum
|
||||
================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.sum
|
6
docs/source/reference/api/eland.Series.tail.rst
Normal file
6
docs/source/reference/api/eland.Series.tail.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.tail
|
||||
=================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.tail
|
6
docs/source/reference/api/eland.Series.to_string.rst
Normal file
6
docs/source/reference/api/eland.Series.to_string.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.to_string
|
||||
======================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.to_string
|
6
docs/source/reference/api/eland.Series.truediv.rst
Normal file
6
docs/source/reference/api/eland.Series.truediv.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.truediv
|
||||
====================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.truediv
|
@ -1,5 +1,5 @@
|
||||
eland.Series.value_counts
|
||||
===========================
|
||||
=========================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
|
@ -91,5 +91,3 @@ Elasticsearch utilities
|
||||
:toctree: api/
|
||||
|
||||
DataFrame.info_es
|
||||
|
||||
|
||||
|
@ -5,9 +5,77 @@ Series
|
||||
=========
|
||||
.. currentmodule:: eland
|
||||
|
||||
Constructor
|
||||
~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
||||
Series
|
||||
|
||||
Attributes and underlying data
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
**Axes**
|
||||
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
||||
Series.index
|
||||
Series.shape
|
||||
Series.name
|
||||
Series.empty
|
||||
|
||||
Indexing, iteration
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
||||
Series.head
|
||||
Series.tail
|
||||
|
||||
Binary operator functions
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
||||
Series.add
|
||||
Series.sub
|
||||
Series.mul
|
||||
Series.div
|
||||
Series.truediv
|
||||
Series.floordiv
|
||||
Series.mod
|
||||
Series.pow
|
||||
|
||||
Computations / descriptive stats
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
||||
Series.describe
|
||||
Series.max
|
||||
Series.mean
|
||||
Series.min
|
||||
Series.sum
|
||||
Series.nunique
|
||||
Series.value_counts
|
||||
|
||||
Reindexing / selection / label manipulation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
||||
Series.rename
|
||||
|
||||
Serialization / IO / conversion
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
||||
Series.to_string
|
||||
|
||||
Elasticsearch utilities
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
||||
Series.info_es
|
||||
|
@ -1,5 +1,6 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from eland.common import *
|
||||
from eland.client import *
|
||||
from eland.filter import *
|
||||
from eland.index import *
|
||||
|
8
eland/common.py
Normal file
8
eland/common.py
Normal file
@ -0,0 +1,8 @@
|
||||
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
||||
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
||||
|
||||
def docstring_parameter(*sub):
|
||||
def dec(obj):
|
||||
obj.__doc__ = obj.__doc__.format(*sub)
|
||||
return obj
|
||||
return dec
|
@ -18,15 +18,7 @@ import eland.plotting as gfx
|
||||
from eland import NDFrame
|
||||
from eland import Series
|
||||
from eland.filter import BooleanFilter, ScriptFilter
|
||||
|
||||
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
||||
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
||||
|
||||
def docstring_parameter(*sub):
|
||||
def dec(obj):
|
||||
obj.__doc__ = obj.__doc__.format(*sub)
|
||||
return obj
|
||||
return dec
|
||||
from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
|
||||
|
||||
|
||||
class DataFrame(NDFrame):
|
||||
@ -43,7 +35,7 @@ class DataFrame(NDFrame):
|
||||
- elasticsearch-py instance or
|
||||
- eland.Client instance
|
||||
index_pattern: str
|
||||
Elasticsearch index pattern (e.g. 'flights' or 'filebeat-*')
|
||||
Elasticsearch index pattern (e.g. 'flights' or 'filebeat-\*')
|
||||
columns: list of str, optional
|
||||
List of DataFrame columns. A subset of the Elasticsearch index's fields.
|
||||
index_field: str, optional
|
||||
@ -98,7 +90,6 @@ class DataFrame(NDFrame):
|
||||
<BLANKLINE>
|
||||
[5 rows x 2 columns]
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
client=None,
|
||||
index_pattern=None,
|
||||
@ -586,7 +577,7 @@ class DataFrame(NDFrame):
|
||||
max_rows = 1
|
||||
|
||||
# Create a slightly bigger dataframe than display
|
||||
df = self._build_repr_df(max_rows + 1, max_cols)
|
||||
df = self._build_repr(max_rows + 1)
|
||||
|
||||
if buf is not None:
|
||||
_buf = _expand_user(_stringify_path(buf))
|
||||
@ -651,7 +642,7 @@ class DataFrame(NDFrame):
|
||||
max_rows = 1
|
||||
|
||||
# Create a slightly bigger dataframe than display
|
||||
df = self._build_repr_df(max_rows + 1, max_cols)
|
||||
df = self._build_repr(max_rows + 1)
|
||||
|
||||
if buf is not None:
|
||||
_buf = _expand_user(_stringify_path(buf))
|
||||
@ -1064,3 +1055,48 @@ class DataFrame(NDFrame):
|
||||
return self._getitem(key)
|
||||
else:
|
||||
return default
|
||||
|
||||
@property
|
||||
def values(self):
|
||||
"""
|
||||
Not implemented.
|
||||
|
||||
In pandas this returns a Numpy representation of the DataFrame. This would involve scan/scrolling the
|
||||
entire index.
|
||||
|
||||
If this is required, call ``ed.eland_to_pandas(ed_df).values``, _but beware this will scan/scroll the entire
|
||||
Elasticsearch index(s) into memory_
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.DataFrame.values`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> ed_df = ed.DataFrame('localhost', 'flights', columns=['AvgTicketPrice', 'Carrier']).head(5)
|
||||
>>> pd_df = ed.eland_to_pandas(ed_df)
|
||||
>>> print("type(ed_df)={0}\\ntype(pd_df)={1}".format(type(ed_df), type(pd_df)))
|
||||
type(ed_df)=<class 'eland.dataframe.DataFrame'>
|
||||
type(pd_df)=<class 'pandas.core.frame.DataFrame'>
|
||||
>>> ed_df
|
||||
AvgTicketPrice Carrier
|
||||
0 841.265642 Kibana Airlines
|
||||
1 882.982662 Logstash Airways
|
||||
2 190.636904 Logstash Airways
|
||||
3 181.694216 Kibana Airlines
|
||||
4 730.041778 Kibana Airlines
|
||||
<BLANKLINE>
|
||||
[5 rows x 2 columns]
|
||||
>>> pd_df.values
|
||||
array([[841.2656419677076, 'Kibana Airlines'],
|
||||
[882.9826615595518, 'Logstash Airways'],
|
||||
[190.6369038508356, 'Logstash Airways'],
|
||||
[181.69421554118, 'Kibana Airlines'],
|
||||
[730.041778346198, 'Kibana Airlines']], dtype=object)
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"This method would scan/scroll the entire Elasticsearch index(s) into memory."
|
||||
"If this is explicitly required and there is sufficient memory, call `ed.eland_to_pandas(ed_df).values`"
|
||||
)
|
||||
|
||||
to_numpy = values
|
||||
|
@ -31,7 +31,6 @@ from pandas.util._validators import validate_bool_kwarg
|
||||
|
||||
from eland import ElandQueryCompiler
|
||||
|
||||
|
||||
class NDFrame:
|
||||
|
||||
def __init__(self,
|
||||
@ -65,6 +64,7 @@ class NDFrame:
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.DataFrame.index`
|
||||
:pandas_api_docs:`pandas.Series.index`
|
||||
|
||||
Examples
|
||||
--------
|
||||
@ -72,6 +72,10 @@ class NDFrame:
|
||||
>>> assert isinstance(df.index, ed.Index)
|
||||
>>> df.index.index_field
|
||||
'_id'
|
||||
>>> s = df['Carrier']
|
||||
>>> assert isinstance(s.index, ed.Index)
|
||||
>>> s.index.index_field
|
||||
'_id'
|
||||
"""
|
||||
return self._query_compiler.index
|
||||
|
||||
@ -104,9 +108,8 @@ class NDFrame:
|
||||
"""
|
||||
return self._query_compiler.dtypes
|
||||
|
||||
def _build_repr_df(self, num_rows, num_cols):
|
||||
# Overriden version of BasePandasDataset._build_repr_df
|
||||
# to avoid issues with concat
|
||||
def _build_repr(self, num_rows):
|
||||
# self could be Series or DataFrame
|
||||
if len(self.index) <= num_rows:
|
||||
return self._to_pandas()
|
||||
|
||||
|
@ -588,6 +588,7 @@ class Operations:
|
||||
df = self._apply_df_post_processing(df, post_processing)
|
||||
collector.collect(df)
|
||||
|
||||
|
||||
def iloc(self, index, field_names):
|
||||
# index and field_names are indexers
|
||||
task = ('iloc', (index, field_names))
|
||||
@ -881,9 +882,10 @@ class Operations:
|
||||
left_field = item[1][1][1][0]
|
||||
right_field = item[1][1][1][1]
|
||||
|
||||
# https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-api-reference-shared-java-lang.html#painless-api-reference-shared-Math
|
||||
if isinstance(right_field, str):
|
||||
"""
|
||||
(if op_name = 'truediv')
|
||||
(if op_name = '__truediv__')
|
||||
|
||||
"script_fields": {
|
||||
"field_name": {
|
||||
@ -893,12 +895,23 @@ class Operations:
|
||||
}
|
||||
}
|
||||
"""
|
||||
if op_name == 'truediv':
|
||||
op = '/'
|
||||
if op_name == '__add__':
|
||||
source = "doc['{0}'].value + doc['{1}'].value".format(left_field, right_field)
|
||||
elif op_name == '__truediv__':
|
||||
source = "doc['{0}'].value / doc['{1}'].value".format(left_field, right_field)
|
||||
elif op_name == '__floordiv__':
|
||||
source = "Math.floor(doc['{0}'].value / doc['{1}'].value)".format(left_field, right_field)
|
||||
elif op_name == '__pow__':
|
||||
source = "Math.pow(doc['{0}'].value, doc['{1}'].value)".format(left_field, right_field)
|
||||
elif op_name == '__mod__':
|
||||
source = "doc['{0}'].value % doc['{1}'].value".format(left_field, right_field)
|
||||
elif op_name == '__mul__':
|
||||
source = "doc['{0}'].value * doc['{1}'].value".format(left_field, right_field)
|
||||
elif op_name == '__sub__':
|
||||
source = "doc['{0}'].value - doc['{1}'].value".format(left_field, right_field)
|
||||
else:
|
||||
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
||||
|
||||
source = "doc['{0}'].value {1} doc['{2}'].value".format(left_field, op, right_field)
|
||||
|
||||
if query_params['query_script_fields'] is None:
|
||||
query_params['query_script_fields'] = {}
|
||||
@ -909,7 +922,7 @@ class Operations:
|
||||
}
|
||||
else:
|
||||
"""
|
||||
(if op_name = 'truediv')
|
||||
(if op_name = '__truediv__')
|
||||
|
||||
"script_fields": {
|
||||
"field_name": {
|
||||
@ -919,12 +932,23 @@ class Operations:
|
||||
}
|
||||
}
|
||||
"""
|
||||
if op_name == 'truediv':
|
||||
op = '/'
|
||||
if op_name == '__add__':
|
||||
source = "doc['{0}'].value + {1}".format(left_field, right_field)
|
||||
elif op_name == '__truediv__':
|
||||
source = "doc['{0}'].value / {1}".format(left_field, right_field)
|
||||
elif op_name == '__floordiv__':
|
||||
source = "Math.floor(doc['{0}'].value / {1})".format(left_field, right_field)
|
||||
elif op_name == '__pow__':
|
||||
source = "Math.pow(doc['{0}'].value, {1})".format(left_field, right_field)
|
||||
elif op_name == '__mod__':
|
||||
source = "doc['{0}'].value % {1}".format(left_field, right_field)
|
||||
elif op_name == '__mul__':
|
||||
source = "doc['{0}'].value * {1}".format(left_field, right_field)
|
||||
elif op_name == '__sub__':
|
||||
source = "doc['{0}'].value - {1}".format(left_field, right_field)
|
||||
else:
|
||||
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
||||
|
||||
source = "doc['{0}'].value {1} {2}".format(left_field, op, right_field)
|
||||
|
||||
if query_params['query_script_fields'] is None:
|
||||
query_params['query_script_fields'] = {}
|
||||
|
@ -239,9 +239,9 @@ class ElandQueryCompiler:
|
||||
# Create pandas DataFrame
|
||||
df = pd.DataFrame(data=rows, index=index)
|
||||
|
||||
# _source may not contain all columns in the mapping
|
||||
# therefore, fill in missing columns
|
||||
# (note this returns self.columns NOT IN df.columns)
|
||||
# _source may not contain all field_names in the mapping
|
||||
# therefore, fill in missing field_names
|
||||
# (note this returns self.field_names NOT IN df.columns)
|
||||
missing_field_names = list(set(self.field_names) - set(df.columns))
|
||||
|
||||
for missing in missing_field_names:
|
||||
|
620
eland/series.py
620
eland/series.py
@ -11,19 +11,26 @@ without storing the dataset in local memory.
|
||||
|
||||
Implementation Details
|
||||
----------------------
|
||||
Based on NDFrame which underpins eland.1DataFrame
|
||||
Based on NDFrame which underpins eland.DataFrame
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
from io import StringIO
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from pandas.io.common import _expand_user, _stringify_path
|
||||
|
||||
from eland import NDFrame
|
||||
from eland.common import DEFAULT_NUM_ROWS_DISPLAYED
|
||||
from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
|
||||
|
||||
|
||||
def _get_method_name():
|
||||
return sys._getframe(1).f_code.co_name
|
||||
|
||||
|
||||
class Series(NDFrame):
|
||||
"""
|
||||
pandas.Series like API that proxies into Elasticsearch index(es).
|
||||
@ -34,35 +41,35 @@ class Series(NDFrame):
|
||||
A reference to a Elasticsearch python client
|
||||
|
||||
index_pattern : str
|
||||
An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*).
|
||||
An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-\*\).
|
||||
|
||||
index_field : str
|
||||
The field to base the series on
|
||||
|
||||
See Also
|
||||
--------
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
import eland as ed
|
||||
client = ed.Client(Elasticsearch())
|
||||
s = ed.DataFrame(client, 'reviews', 'date')
|
||||
df.head()
|
||||
reviewerId vendorId rating date
|
||||
0 0 0 5 2006-04-07 17:08
|
||||
1 1 1 5 2006-05-04 12:16
|
||||
2 2 2 4 2006-04-21 12:26
|
||||
3 3 3 5 2006-04-18 15:48
|
||||
4 3 4 5 2006-04-18 15:49
|
||||
|
||||
Notice that the types are based on Elasticsearch mappings
|
||||
|
||||
Notes
|
||||
-----
|
||||
If the Elasticsearch index is deleted or index mappings are changed after this
|
||||
object is created, the object is not rebuilt and so inconsistencies can occur.
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.Series`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> ed.Series(client='localhost', index_pattern='flights', name='Carrier')
|
||||
0 Kibana Airlines
|
||||
1 Logstash Airways
|
||||
2 Logstash Airways
|
||||
3 Kibana Airlines
|
||||
4 Kibana Airlines
|
||||
...
|
||||
13054 Logstash Airways
|
||||
13055 Logstash Airways
|
||||
13056 Logstash Airways
|
||||
13057 JetBeats
|
||||
13058 JetBeats
|
||||
Name: Carrier, Length: 13059, dtype: object
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@ -94,6 +101,34 @@ class Series(NDFrame):
|
||||
"""
|
||||
return len(self.index) == 0
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
"""
|
||||
Return a tuple representing the dimensionality of the Series.
|
||||
|
||||
Returns
|
||||
-------
|
||||
shape: tuple
|
||||
|
||||
0. number of rows
|
||||
1. number of columns
|
||||
|
||||
Notes
|
||||
-----
|
||||
- number of rows ``len(series)`` queries Elasticsearch
|
||||
- number of columns == 1
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.Series('localhost', 'ecommerce', name='total_quantity')
|
||||
>>> df.shape
|
||||
(4675, 1)
|
||||
"""
|
||||
num_rows = len(self)
|
||||
num_columns = 1
|
||||
|
||||
return num_rows, num_columns
|
||||
|
||||
def _get_name(self):
|
||||
return self._query_compiler.columns[0]
|
||||
|
||||
@ -118,7 +153,7 @@ class Series(NDFrame):
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:pandas.Series.rename
|
||||
:pandas_api_docs:`pandas.Series.rename`
|
||||
|
||||
Examples
|
||||
--------
|
||||
@ -200,12 +235,39 @@ class Series(NDFrame):
|
||||
|
||||
return self._query_compiler.value_counts(es_size)
|
||||
|
||||
# dtype not implemented for Series as causes query to fail
|
||||
# in pandas.core.computation.ops.Term.type
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Rendering Methods
|
||||
def __repr__(self):
|
||||
num_rows = pd.get_option("max_rows") or 60
|
||||
"""
|
||||
Return a string representation for a particular Series.
|
||||
"""
|
||||
buf = StringIO()
|
||||
|
||||
return self.to_string(max_rows=num_rows)
|
||||
# max_rows and max_cols determine the maximum size of the pretty printed tabular
|
||||
# representation of the series. pandas defaults are 60 and 20 respectively.
|
||||
# series where len(series) > max_rows shows a truncated view with 10 rows shown.
|
||||
max_rows = pd.get_option("display.max_rows")
|
||||
min_rows = pd.get_option("display.min_rows")
|
||||
|
||||
if len(self) > max_rows:
|
||||
max_rows = min_rows
|
||||
|
||||
show_dimensions = pd.get_option("display.show_dimensions")
|
||||
|
||||
self.to_string(
|
||||
buf=buf,
|
||||
name=self.name,
|
||||
dtype=True,
|
||||
min_rows=min_rows,
|
||||
max_rows=max_rows,
|
||||
length=show_dimensions,
|
||||
)
|
||||
result = buf.getvalue()
|
||||
|
||||
return result
|
||||
|
||||
def to_string(
|
||||
self,
|
||||
@ -217,33 +279,69 @@ class Series(NDFrame):
|
||||
length=False,
|
||||
dtype=False,
|
||||
name=False,
|
||||
max_rows=None):
|
||||
|
||||
if max_rows is None:
|
||||
max_rows=None,
|
||||
min_rows=None,
|
||||
):
|
||||
# In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
|
||||
# by limiting rows by default.
|
||||
num_rows = len(self) # avoid multiple calls
|
||||
if num_rows <= DEFAULT_NUM_ROWS_DISPLAYED:
|
||||
if max_rows is None:
|
||||
max_rows = num_rows
|
||||
else:
|
||||
max_rows = min(num_rows, max_rows)
|
||||
elif max_rows is None:
|
||||
warnings.warn("Series.to_string called without max_rows set "
|
||||
"- this will return entire index results. "
|
||||
"Setting max_rows=60, overwrite if different behaviour is required.")
|
||||
max_rows = 60
|
||||
"Setting max_rows={default}"
|
||||
" overwrite if different behaviour is required."
|
||||
.format(default=DEFAULT_NUM_ROWS_DISPLAYED),
|
||||
UserWarning)
|
||||
max_rows = DEFAULT_NUM_ROWS_DISPLAYED
|
||||
|
||||
# because of the way pandas handles max_rows=0, not having this throws an error
|
||||
# see eland issue #56
|
||||
if max_rows == 0:
|
||||
max_rows = 1
|
||||
|
||||
# Create a slightly bigger dataframe than display
|
||||
temp_df = self._build_repr_df(max_rows + 1, None)
|
||||
if isinstance(temp_df, pd.DataFrame):
|
||||
temp_df = temp_df[self.name]
|
||||
temp_str = repr(temp_df)
|
||||
if self.name is not None:
|
||||
name_str = "Name: {}, ".format(str(self.name))
|
||||
temp_series = self._build_repr(max_rows + 1)
|
||||
|
||||
if buf is not None:
|
||||
_buf = _expand_user(_stringify_path(buf))
|
||||
else:
|
||||
name_str = ""
|
||||
if len(self.index) > max_rows:
|
||||
len_str = "Length: {}, ".format(len(self.index))
|
||||
else:
|
||||
len_str = ""
|
||||
dtype_str = "dtype: {}".format(temp_str.rsplit("dtype: ", 1)[-1])
|
||||
if len(self) == 0:
|
||||
return "Series([], {}{}".format(name_str, dtype_str)
|
||||
return temp_str.rsplit("\nName:", 1)[0] + "\n{}{}{}".format(
|
||||
name_str, len_str, dtype_str
|
||||
)
|
||||
_buf = StringIO()
|
||||
|
||||
# Create repr of fake series without name, length, dtype summary
|
||||
temp_str = temp_series.to_string(buf=_buf,
|
||||
na_rep=na_rep,
|
||||
float_format=float_format,
|
||||
header=header,
|
||||
index=index,
|
||||
length=False,
|
||||
dtype=False,
|
||||
name=False,
|
||||
max_rows=max_rows)
|
||||
|
||||
# Create the summary
|
||||
footer = ""
|
||||
if name and self.name is not None:
|
||||
footer += "Name: {}".format(str(self.name))
|
||||
if length and len(self) > max_rows:
|
||||
if footer:
|
||||
footer += ", "
|
||||
footer += "Length: {}".format(len(self.index))
|
||||
if dtype:
|
||||
if footer:
|
||||
footer += ", "
|
||||
footer += "dtype: {}".format(temp_series.dtype)
|
||||
|
||||
if len(footer) > 0:
|
||||
_buf.write("\n{}".format(footer))
|
||||
|
||||
if buf is None:
|
||||
result = _buf.getvalue()
|
||||
return result
|
||||
|
||||
def _to_pandas(self):
|
||||
return self._query_compiler.to_pandas()[self.name]
|
||||
@ -321,13 +419,16 @@ class Series(NDFrame):
|
||||
@property
|
||||
def ndim(self):
|
||||
"""
|
||||
Returns 1 by definition of a Series1
|
||||
Returns 1 by definition of a Series
|
||||
|
||||
Returns
|
||||
-------
|
||||
int
|
||||
By definition 1
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.Series.ndim`
|
||||
"""
|
||||
return 1
|
||||
|
||||
@ -338,34 +439,317 @@ class Series(NDFrame):
|
||||
|
||||
return buf.getvalue()
|
||||
|
||||
def __truediv__(self, right):
|
||||
return self.truediv(right)
|
||||
|
||||
def truediv(self, right):
|
||||
def __add__(self, right):
|
||||
"""
|
||||
return a / b
|
||||
Return addition of series and right, element-wise (binary operator add).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: eland.Series
|
||||
|
||||
Returns
|
||||
-------
|
||||
eland.Series
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||
>>> df.taxful_total_price
|
||||
0 36.98
|
||||
1 53.98
|
||||
2 199.98
|
||||
3 174.98
|
||||
4 80.98
|
||||
Name: taxful_total_price, dtype: float64
|
||||
>>> df.total_quantity
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
3 2
|
||||
4 2
|
||||
Name: total_quantity, dtype: int64
|
||||
>>> df.taxful_total_price + df.total_quantity
|
||||
0 38.980000
|
||||
1 55.980000
|
||||
2 201.979996
|
||||
3 176.979996
|
||||
4 82.980003
|
||||
dtype: float64
|
||||
"""
|
||||
return self._numeric_op(right, _get_method_name())
|
||||
|
||||
def __truediv__(self, right):
|
||||
"""
|
||||
Return floating division of series and right, element-wise (binary operator truediv).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: eland.Series
|
||||
|
||||
Returns
|
||||
-------
|
||||
eland.Series
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||
>>> df.taxful_total_price
|
||||
0 36.98
|
||||
1 53.98
|
||||
2 199.98
|
||||
3 174.98
|
||||
4 80.98
|
||||
Name: taxful_total_price, dtype: float64
|
||||
>>> df.total_quantity
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
3 2
|
||||
4 2
|
||||
Name: total_quantity, dtype: int64
|
||||
>>> df.taxful_total_price / df.total_quantity
|
||||
0 18.490000
|
||||
1 26.990000
|
||||
2 99.989998
|
||||
3 87.489998
|
||||
4 40.490002
|
||||
dtype: float64
|
||||
"""
|
||||
return self._numeric_op(right, _get_method_name())
|
||||
|
||||
def __floordiv__(self, right):
|
||||
"""
|
||||
Return integer division of series and right, element-wise (binary operator floordiv //).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: eland.Series
|
||||
|
||||
Returns
|
||||
-------
|
||||
eland.Series
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||
>>> df.taxful_total_price
|
||||
0 36.98
|
||||
1 53.98
|
||||
2 199.98
|
||||
3 174.98
|
||||
4 80.98
|
||||
Name: taxful_total_price, dtype: float64
|
||||
>>> df.total_quantity
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
3 2
|
||||
4 2
|
||||
Name: total_quantity, dtype: int64
|
||||
>>> df.taxful_total_price // df.total_quantity
|
||||
0 18.0
|
||||
1 26.0
|
||||
2 99.0
|
||||
3 87.0
|
||||
4 40.0
|
||||
dtype: float64
|
||||
"""
|
||||
return self._numeric_op(right, _get_method_name())
|
||||
|
||||
def __mod__(self, right):
|
||||
"""
|
||||
Return modulo of series and right, element-wise (binary operator mod %).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: eland.Series
|
||||
|
||||
Returns
|
||||
-------
|
||||
eland.Series
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||
>>> df.taxful_total_price
|
||||
0 36.98
|
||||
1 53.98
|
||||
2 199.98
|
||||
3 174.98
|
||||
4 80.98
|
||||
Name: taxful_total_price, dtype: float64
|
||||
>>> df.total_quantity
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
3 2
|
||||
4 2
|
||||
Name: total_quantity, dtype: int64
|
||||
>>> df.taxful_total_price % df.total_quantity
|
||||
0 0.980000
|
||||
1 1.980000
|
||||
2 1.979996
|
||||
3 0.979996
|
||||
4 0.980003
|
||||
dtype: float64
|
||||
"""
|
||||
return self._numeric_op(right, _get_method_name())
|
||||
|
||||
def __mul__(self, right):
|
||||
"""
|
||||
Return multiplication of series and right, element-wise (binary operator mul).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: eland.Series
|
||||
|
||||
Returns
|
||||
-------
|
||||
eland.Series
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||
>>> df.taxful_total_price
|
||||
0 36.98
|
||||
1 53.98
|
||||
2 199.98
|
||||
3 174.98
|
||||
4 80.98
|
||||
Name: taxful_total_price, dtype: float64
|
||||
>>> df.total_quantity
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
3 2
|
||||
4 2
|
||||
Name: total_quantity, dtype: int64
|
||||
>>> df.taxful_total_price * df.total_quantity
|
||||
0 73.959999
|
||||
1 107.959999
|
||||
2 399.959991
|
||||
3 349.959991
|
||||
4 161.960007
|
||||
dtype: float64
|
||||
"""
|
||||
return self._numeric_op(right, _get_method_name())
|
||||
|
||||
def __sub__(self, right):
|
||||
"""
|
||||
Return subtraction of series and right, element-wise (binary operator sub).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: eland.Series
|
||||
|
||||
Returns
|
||||
-------
|
||||
eland.Series
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||
>>> df.taxful_total_price
|
||||
0 36.98
|
||||
1 53.98
|
||||
2 199.98
|
||||
3 174.98
|
||||
4 80.98
|
||||
Name: taxful_total_price, dtype: float64
|
||||
>>> df.total_quantity
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
3 2
|
||||
4 2
|
||||
Name: total_quantity, dtype: int64
|
||||
>>> df.taxful_total_price - df.total_quantity
|
||||
0 34.980000
|
||||
1 51.980000
|
||||
2 197.979996
|
||||
3 172.979996
|
||||
4 78.980003
|
||||
dtype: float64
|
||||
"""
|
||||
return self._numeric_op(right, _get_method_name())
|
||||
|
||||
def __pow__(self, right):
|
||||
"""
|
||||
Return exponential power of series and right, element-wise (binary operator pow \**\).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
right: eland.Series
|
||||
|
||||
Returns
|
||||
-------
|
||||
eland.Series
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||
>>> df.taxful_total_price
|
||||
0 36.98
|
||||
1 53.98
|
||||
2 199.98
|
||||
3 174.98
|
||||
4 80.98
|
||||
Name: taxful_total_price, dtype: float64
|
||||
>>> df.total_quantity
|
||||
0 2
|
||||
1 2
|
||||
2 2
|
||||
3 2
|
||||
4 2
|
||||
Name: total_quantity, dtype: int64
|
||||
>>> df.taxful_total_price ** df.total_quantity
|
||||
0 1367.520366
|
||||
1 2913.840351
|
||||
2 39991.998691
|
||||
3 30617.998905
|
||||
4 6557.760944
|
||||
dtype: float64
|
||||
"""
|
||||
return self._numeric_op(right, _get_method_name())
|
||||
|
||||
add = __add__
|
||||
div = __truediv__
|
||||
divide = __truediv__
|
||||
floordiv = __floordiv__
|
||||
mod = __mod__
|
||||
mul = __mul__
|
||||
multiply = __mul__
|
||||
pow = __pow__
|
||||
sub = __sub__
|
||||
subtract = __sub__
|
||||
truediv = __truediv__
|
||||
|
||||
def _numeric_op(self, right, method_name):
|
||||
"""
|
||||
return a op b
|
||||
|
||||
a & b == Series
|
||||
a & b must share same eland.Client, index_pattern and index_field
|
||||
a == Series, b == numeric
|
||||
"""
|
||||
if isinstance(right, Series):
|
||||
# Check compatibility
|
||||
self._query_compiler.check_arithmetics(right._query_compiler)
|
||||
|
||||
new_field_name = "{0}_{1}_{2}".format(self.name, "truediv", right.name)
|
||||
new_field_name = "{0}_{1}_{2}".format(self.name, method_name, right.name)
|
||||
|
||||
# Compatible, so create new Series
|
||||
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
||||
new_field_name, 'truediv', self.name, right.name))
|
||||
new_field_name, method_name, self.name, right.name))
|
||||
series.name = None
|
||||
|
||||
return series
|
||||
elif isinstance(right, (int, float)): # TODO extend to numpy types
|
||||
new_field_name = "{0}_{1}_{2}".format(self.name, "truediv", str(right).replace('.','_'))
|
||||
elif isinstance(right, (int, float)): # TODO extend to numpy types
|
||||
new_field_name = "{0}_{1}_{2}".format(self.name, method_name, str(right).replace('.', '_'))
|
||||
|
||||
# Compatible, so create new Series
|
||||
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
||||
new_field_name, 'truediv', self.name, float(right))) # force rhs to float
|
||||
new_field_name, method_name, self.name, float(right))) # force rhs to float
|
||||
|
||||
# name of Series remains original name
|
||||
series.name = self.name
|
||||
@ -374,5 +758,123 @@ class Series(NDFrame):
|
||||
else:
|
||||
raise TypeError(
|
||||
"Can only perform arithmetic operation on selected types "
|
||||
"{0} != {1}".format(type(self), type(right))
|
||||
"{0} != {1} for {2}".format(type(self), type(right), method_name)
|
||||
)
|
||||
|
||||
def max(self):
|
||||
"""
|
||||
Return the maximum of the Series values
|
||||
|
||||
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
max value
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.Series.max`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = ed.Series('localhost', 'flights', name='AvgTicketPrice')
|
||||
>>> int(s.max())
|
||||
1199
|
||||
"""
|
||||
results = super().max()
|
||||
return results.squeeze()
|
||||
|
||||
def mean(self):
|
||||
"""
|
||||
Return the mean of the Series values
|
||||
|
||||
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
max value
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.Series.mean`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = ed.Series('localhost', 'flights', name='AvgTicketPrice')
|
||||
>>> int(s.mean())
|
||||
628
|
||||
"""
|
||||
results = super().mean()
|
||||
return results.squeeze()
|
||||
|
||||
def min(self):
|
||||
"""
|
||||
Return the minimum of the Series values
|
||||
|
||||
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
max value
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.Series.min`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = ed.Series('localhost', 'flights', name='AvgTicketPrice')
|
||||
>>> int(s.min())
|
||||
100
|
||||
"""
|
||||
results = super().min()
|
||||
return results.squeeze()
|
||||
|
||||
def sum(self):
|
||||
"""
|
||||
Return the sum of the Series values
|
||||
|
||||
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
max value
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.Series.sum`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = ed.Series('localhost', 'flights', name='AvgTicketPrice')
|
||||
>>> int(s.sum())
|
||||
8204364
|
||||
"""
|
||||
results = super().sum()
|
||||
return results.squeeze()
|
||||
|
||||
def nunique(self):
|
||||
"""
|
||||
Return the sum of the Series values
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
max value
|
||||
|
||||
See Also
|
||||
--------
|
||||
:pandas_api_docs:`pandas.Series.sum`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> s = ed.Series('localhost', 'flights', name='Carrier')
|
||||
>>> s.nunique()
|
||||
4
|
||||
"""
|
||||
results = super().nunique()
|
||||
return results.squeeze()
|
||||
|
@ -279,10 +279,10 @@ ECOMMERCE_MAPPING = {"mappings": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"taxful_total_price": {
|
||||
"type": "half_float"
|
||||
"type": "float"
|
||||
},
|
||||
"taxless_total_price": {
|
||||
"type": "half_float"
|
||||
"type": "float"
|
||||
},
|
||||
"total_quantity": {
|
||||
"type": "integer"
|
||||
|
@ -4,6 +4,8 @@ from eland.tests.common import TestData, assert_pandas_eland_series_equal
|
||||
from pandas.util.testing import assert_series_equal
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TestSeriesArithmetics(TestData):
|
||||
|
||||
@ -15,29 +17,35 @@ class TestSeriesArithmetics(TestData):
|
||||
with pytest.raises(TypeError):
|
||||
ed_df['total_quantity'] / pd_df['taxful_total_price']
|
||||
|
||||
def test_ecommerce_series_div(self):
|
||||
pd_df = self.pd_ecommerce()
|
||||
ed_df = self.ed_ecommerce()
|
||||
def test_ecommerce_series_basic_arithmetics(self):
|
||||
pd_df = self.pd_ecommerce().head(100)
|
||||
ed_df = self.ed_ecommerce().head(100)
|
||||
|
||||
pd_avg_price = pd_df['total_quantity'] / pd_df['taxful_total_price']
|
||||
ed_avg_price = ed_df['total_quantity'] / ed_df['taxful_total_price']
|
||||
ops = ['__add__',
|
||||
'__truediv__',
|
||||
'__floordiv__',
|
||||
'__pow__',
|
||||
'__mod__',
|
||||
'__mul__',
|
||||
'__sub__',
|
||||
'add',
|
||||
'truediv',
|
||||
'floordiv',
|
||||
'pow',
|
||||
'mod',
|
||||
'mul',
|
||||
'sub']
|
||||
|
||||
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
||||
for op in ops:
|
||||
pd_series = getattr(pd_df['taxful_total_price'], op)(pd_df['total_quantity'])
|
||||
ed_series = getattr(ed_df['taxful_total_price'], op)(ed_df['total_quantity'])
|
||||
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
|
||||
|
||||
def test_ecommerce_series_div_float(self):
|
||||
pd_df = self.pd_ecommerce()
|
||||
ed_df = self.ed_ecommerce()
|
||||
pd_series = getattr(pd_df['taxful_total_price'], op)(10.56)
|
||||
ed_series = getattr(ed_df['taxful_total_price'], op)(10.56)
|
||||
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
|
||||
|
||||
pd_avg_price = pd_df['total_quantity'] / 10.0
|
||||
ed_avg_price = ed_df['total_quantity'] / 10.0
|
||||
pd_series = getattr(pd_df['taxful_total_price'], op)(int(8))
|
||||
ed_series = getattr(ed_df['taxful_total_price'], op)(int(8))
|
||||
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
|
||||
|
||||
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
||||
|
||||
def test_ecommerce_series_div_int(self):
|
||||
pd_df = self.pd_ecommerce()
|
||||
ed_df = self.ed_ecommerce()
|
||||
|
||||
pd_avg_price = pd_df['total_quantity'] / int(10)
|
||||
ed_avg_price = ed_df['total_quantity'] / int(10)
|
||||
|
||||
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
||||
|
17
eland/tests/series/test_info_es_pytest.py
Normal file
17
eland/tests/series/test_info_es_pytest.py
Normal file
@ -0,0 +1,17 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
from pandas.util.testing import assert_almost_equal
|
||||
|
||||
from eland.tests.common import TestData
|
||||
|
||||
import eland as ed
|
||||
|
||||
|
||||
class TestSeriesInfoEs(TestData):
|
||||
|
||||
def test_flights_info_es(self):
|
||||
ed_flights = self.ed_flights()['AvgTicketPrice']
|
||||
|
||||
# No assertion, just test it can be called
|
||||
info_es = ed_flights.info_es()
|
||||
|
44
eland/tests/series/test_metrics_pytest.py
Normal file
44
eland/tests/series/test_metrics_pytest.py
Normal file
@ -0,0 +1,44 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
from pandas.util.testing import assert_almost_equal
|
||||
|
||||
from eland.tests.common import TestData
|
||||
|
||||
import eland as ed
|
||||
|
||||
|
||||
class TestSeriesMetrics(TestData):
|
||||
|
||||
funcs = ['max', 'min', 'mean', 'sum']
|
||||
|
||||
def test_flights_metrics(self):
|
||||
pd_flights = self.pd_flights()['AvgTicketPrice']
|
||||
ed_flights = self.ed_flights()['AvgTicketPrice']
|
||||
|
||||
for func in self.funcs:
|
||||
pd_metric = getattr(pd_flights, func)()
|
||||
ed_metric = getattr(ed_flights, func)()
|
||||
assert_almost_equal(pd_metric, ed_metric, check_less_precise=True)
|
||||
|
||||
def test_ecommerce_selected_non_numeric_source_fields(self):
|
||||
# None of these are numeric
|
||||
column = 'category'
|
||||
|
||||
ed_ecommerce = self.ed_ecommerce()[column]
|
||||
|
||||
for func in self.funcs:
|
||||
ed_metric = getattr(ed_ecommerce, func)()
|
||||
assert ed_metric.empty
|
||||
|
||||
|
||||
def test_ecommerce_selected_all_numeric_source_fields(self):
|
||||
# All of these are numeric
|
||||
columns = ['total_quantity', 'taxful_total_price', 'taxless_total_price']
|
||||
|
||||
for column in columns:
|
||||
pd_ecommerce = self.pd_ecommerce()[column]
|
||||
ed_ecommerce = self.ed_ecommerce()[column]
|
||||
|
||||
for func in self.funcs:
|
||||
assert_almost_equal(getattr(pd_ecommerce, func)(), getattr(ed_ecommerce, func)(),
|
||||
check_less_precise=True)
|
@ -1,13 +1,14 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
import eland as ed
|
||||
import pandas as pd
|
||||
from eland.tests import ELASTICSEARCH_HOST
|
||||
from eland.tests import FLIGHTS_INDEX_NAME
|
||||
from eland.tests import FLIGHTS_INDEX_NAME, ECOMMERCE_INDEX_NAME
|
||||
from eland.tests.common import TestData
|
||||
|
||||
|
||||
class TestSeriesRepr(TestData):
|
||||
|
||||
def test_repr(self):
|
||||
def test_repr_flights_carrier(self):
|
||||
pd_s = self.pd_flights()['Carrier']
|
||||
ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'Carrier')
|
||||
|
||||
@ -15,3 +16,12 @@ class TestSeriesRepr(TestData):
|
||||
ed_repr = repr(ed_s)
|
||||
|
||||
assert pd_repr == ed_repr
|
||||
|
||||
def test_repr_flights_carrier_5(self):
|
||||
pd_s = self.pd_flights()['Carrier'].head(5)
|
||||
ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'Carrier').head(5)
|
||||
|
||||
pd_repr = repr(pd_s)
|
||||
ed_repr = repr(ed_s)
|
||||
|
||||
assert pd_repr == ed_repr
|
||||
|
Loading…
x
Reference in New Issue
Block a user