mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Added Series metric aggs + Series docs
Also, improved Series.to_string()
This commit is contained in:
parent
5d119215f8
commit
84e23ab5d1
6
docs/source/reference/api/eland.Series.add.rst
Normal file
6
docs/source/reference/api/eland.Series.add.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.add
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.add
|
6
docs/source/reference/api/eland.Series.describe.rst
Normal file
6
docs/source/reference/api/eland.Series.describe.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.describe
|
||||||
|
=====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.describe
|
6
docs/source/reference/api/eland.Series.div.rst
Normal file
6
docs/source/reference/api/eland.Series.div.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.div
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.div
|
6
docs/source/reference/api/eland.Series.empty.rst
Normal file
6
docs/source/reference/api/eland.Series.empty.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.empty
|
||||||
|
==================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.empty
|
6
docs/source/reference/api/eland.Series.floordiv.rst
Normal file
6
docs/source/reference/api/eland.Series.floordiv.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.floordiv
|
||||||
|
=====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.floordiv
|
6
docs/source/reference/api/eland.Series.head.rst
Normal file
6
docs/source/reference/api/eland.Series.head.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.head
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.head
|
6
docs/source/reference/api/eland.Series.index.rst
Normal file
6
docs/source/reference/api/eland.Series.index.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.index
|
||||||
|
==================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.index
|
6
docs/source/reference/api/eland.Series.max.rst
Normal file
6
docs/source/reference/api/eland.Series.max.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.max
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.max
|
6
docs/source/reference/api/eland.Series.mean.rst
Normal file
6
docs/source/reference/api/eland.Series.mean.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.mean
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.mean
|
6
docs/source/reference/api/eland.Series.min.rst
Normal file
6
docs/source/reference/api/eland.Series.min.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.min
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.min
|
6
docs/source/reference/api/eland.Series.mod.rst
Normal file
6
docs/source/reference/api/eland.Series.mod.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.mod
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.mod
|
6
docs/source/reference/api/eland.Series.mul.rst
Normal file
6
docs/source/reference/api/eland.Series.mul.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.mul
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.mul
|
6
docs/source/reference/api/eland.Series.name.rst
Normal file
6
docs/source/reference/api/eland.Series.name.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.name
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.name
|
6
docs/source/reference/api/eland.Series.nunique.rst
Normal file
6
docs/source/reference/api/eland.Series.nunique.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.nunique
|
||||||
|
====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.nunique
|
6
docs/source/reference/api/eland.Series.pow.rst
Normal file
6
docs/source/reference/api/eland.Series.pow.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.pow
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.pow
|
6
docs/source/reference/api/eland.Series.rename.rst
Normal file
6
docs/source/reference/api/eland.Series.rename.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.rename
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.rename
|
6
docs/source/reference/api/eland.Series.rst
Normal file
6
docs/source/reference/api/eland.Series.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series
|
||||||
|
============
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoclass:: Series
|
6
docs/source/reference/api/eland.Series.shape.rst
Normal file
6
docs/source/reference/api/eland.Series.shape.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.shape
|
||||||
|
==================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.shape
|
6
docs/source/reference/api/eland.Series.sub.rst
Normal file
6
docs/source/reference/api/eland.Series.sub.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.sub
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.sub
|
6
docs/source/reference/api/eland.Series.sum.rst
Normal file
6
docs/source/reference/api/eland.Series.sum.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.sum
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.sum
|
6
docs/source/reference/api/eland.Series.tail.rst
Normal file
6
docs/source/reference/api/eland.Series.tail.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.tail
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.tail
|
6
docs/source/reference/api/eland.Series.to_string.rst
Normal file
6
docs/source/reference/api/eland.Series.to_string.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.to_string
|
||||||
|
======================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.to_string
|
6
docs/source/reference/api/eland.Series.truediv.rst
Normal file
6
docs/source/reference/api/eland.Series.truediv.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.truediv
|
||||||
|
====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: Series.truediv
|
@ -1,5 +1,5 @@
|
|||||||
eland.Series.value_counts
|
eland.Series.value_counts
|
||||||
===========================
|
=========================
|
||||||
|
|
||||||
.. currentmodule:: eland
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
@ -91,5 +91,3 @@ Elasticsearch utilities
|
|||||||
:toctree: api/
|
:toctree: api/
|
||||||
|
|
||||||
DataFrame.info_es
|
DataFrame.info_es
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,9 +5,77 @@ Series
|
|||||||
=========
|
=========
|
||||||
.. currentmodule:: eland
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
Constructor
|
||||||
|
~~~~~~~~~~~
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
Series
|
||||||
|
|
||||||
|
Attributes and underlying data
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
**Axes**
|
||||||
|
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
Series.index
|
||||||
|
Series.shape
|
||||||
|
Series.name
|
||||||
|
Series.empty
|
||||||
|
|
||||||
|
Indexing, iteration
|
||||||
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
Series.head
|
||||||
|
Series.tail
|
||||||
|
|
||||||
|
Binary operator functions
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
Series.add
|
||||||
|
Series.sub
|
||||||
|
Series.mul
|
||||||
|
Series.div
|
||||||
|
Series.truediv
|
||||||
|
Series.floordiv
|
||||||
|
Series.mod
|
||||||
|
Series.pow
|
||||||
|
|
||||||
Computations / descriptive stats
|
Computations / descriptive stats
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
|
|
||||||
|
Series.describe
|
||||||
|
Series.max
|
||||||
|
Series.mean
|
||||||
|
Series.min
|
||||||
|
Series.sum
|
||||||
|
Series.nunique
|
||||||
Series.value_counts
|
Series.value_counts
|
||||||
|
|
||||||
|
Reindexing / selection / label manipulation
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
Series.rename
|
||||||
|
|
||||||
|
Serialization / IO / conversion
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
Series.to_string
|
||||||
|
|
||||||
|
Elasticsearch utilities
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
Series.info_es
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
from eland.common import *
|
||||||
from eland.client import *
|
from eland.client import *
|
||||||
from eland.filter import *
|
from eland.filter import *
|
||||||
from eland.index import *
|
from eland.index import *
|
||||||
|
8
eland/common.py
Normal file
8
eland/common.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
||||||
|
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
||||||
|
|
||||||
|
def docstring_parameter(*sub):
|
||||||
|
def dec(obj):
|
||||||
|
obj.__doc__ = obj.__doc__.format(*sub)
|
||||||
|
return obj
|
||||||
|
return dec
|
@ -18,15 +18,7 @@ import eland.plotting as gfx
|
|||||||
from eland import NDFrame
|
from eland import NDFrame
|
||||||
from eland import Series
|
from eland import Series
|
||||||
from eland.filter import BooleanFilter, ScriptFilter
|
from eland.filter import BooleanFilter, ScriptFilter
|
||||||
|
from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
|
||||||
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
|
||||||
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
|
||||||
|
|
||||||
def docstring_parameter(*sub):
|
|
||||||
def dec(obj):
|
|
||||||
obj.__doc__ = obj.__doc__.format(*sub)
|
|
||||||
return obj
|
|
||||||
return dec
|
|
||||||
|
|
||||||
|
|
||||||
class DataFrame(NDFrame):
|
class DataFrame(NDFrame):
|
||||||
@ -43,7 +35,7 @@ class DataFrame(NDFrame):
|
|||||||
- elasticsearch-py instance or
|
- elasticsearch-py instance or
|
||||||
- eland.Client instance
|
- eland.Client instance
|
||||||
index_pattern: str
|
index_pattern: str
|
||||||
Elasticsearch index pattern (e.g. 'flights' or 'filebeat-*')
|
Elasticsearch index pattern (e.g. 'flights' or 'filebeat-\*')
|
||||||
columns: list of str, optional
|
columns: list of str, optional
|
||||||
List of DataFrame columns. A subset of the Elasticsearch index's fields.
|
List of DataFrame columns. A subset of the Elasticsearch index's fields.
|
||||||
index_field: str, optional
|
index_field: str, optional
|
||||||
@ -98,7 +90,6 @@ class DataFrame(NDFrame):
|
|||||||
<BLANKLINE>
|
<BLANKLINE>
|
||||||
[5 rows x 2 columns]
|
[5 rows x 2 columns]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
client=None,
|
client=None,
|
||||||
index_pattern=None,
|
index_pattern=None,
|
||||||
@ -586,7 +577,7 @@ class DataFrame(NDFrame):
|
|||||||
max_rows = 1
|
max_rows = 1
|
||||||
|
|
||||||
# Create a slightly bigger dataframe than display
|
# Create a slightly bigger dataframe than display
|
||||||
df = self._build_repr_df(max_rows + 1, max_cols)
|
df = self._build_repr(max_rows + 1)
|
||||||
|
|
||||||
if buf is not None:
|
if buf is not None:
|
||||||
_buf = _expand_user(_stringify_path(buf))
|
_buf = _expand_user(_stringify_path(buf))
|
||||||
@ -651,7 +642,7 @@ class DataFrame(NDFrame):
|
|||||||
max_rows = 1
|
max_rows = 1
|
||||||
|
|
||||||
# Create a slightly bigger dataframe than display
|
# Create a slightly bigger dataframe than display
|
||||||
df = self._build_repr_df(max_rows + 1, max_cols)
|
df = self._build_repr(max_rows + 1)
|
||||||
|
|
||||||
if buf is not None:
|
if buf is not None:
|
||||||
_buf = _expand_user(_stringify_path(buf))
|
_buf = _expand_user(_stringify_path(buf))
|
||||||
@ -1064,3 +1055,48 @@ class DataFrame(NDFrame):
|
|||||||
return self._getitem(key)
|
return self._getitem(key)
|
||||||
else:
|
else:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
@property
|
||||||
|
def values(self):
|
||||||
|
"""
|
||||||
|
Not implemented.
|
||||||
|
|
||||||
|
In pandas this returns a Numpy representation of the DataFrame. This would involve scan/scrolling the
|
||||||
|
entire index.
|
||||||
|
|
||||||
|
If this is required, call ``ed.eland_to_pandas(ed_df).values``, _but beware this will scan/scroll the entire
|
||||||
|
Elasticsearch index(s) into memory_
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.DataFrame.values`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> ed_df = ed.DataFrame('localhost', 'flights', columns=['AvgTicketPrice', 'Carrier']).head(5)
|
||||||
|
>>> pd_df = ed.eland_to_pandas(ed_df)
|
||||||
|
>>> print("type(ed_df)={0}\\ntype(pd_df)={1}".format(type(ed_df), type(pd_df)))
|
||||||
|
type(ed_df)=<class 'eland.dataframe.DataFrame'>
|
||||||
|
type(pd_df)=<class 'pandas.core.frame.DataFrame'>
|
||||||
|
>>> ed_df
|
||||||
|
AvgTicketPrice Carrier
|
||||||
|
0 841.265642 Kibana Airlines
|
||||||
|
1 882.982662 Logstash Airways
|
||||||
|
2 190.636904 Logstash Airways
|
||||||
|
3 181.694216 Kibana Airlines
|
||||||
|
4 730.041778 Kibana Airlines
|
||||||
|
<BLANKLINE>
|
||||||
|
[5 rows x 2 columns]
|
||||||
|
>>> pd_df.values
|
||||||
|
array([[841.2656419677076, 'Kibana Airlines'],
|
||||||
|
[882.9826615595518, 'Logstash Airways'],
|
||||||
|
[190.6369038508356, 'Logstash Airways'],
|
||||||
|
[181.69421554118, 'Kibana Airlines'],
|
||||||
|
[730.041778346198, 'Kibana Airlines']], dtype=object)
|
||||||
|
"""
|
||||||
|
raise NotImplementedError(
|
||||||
|
"This method would scan/scroll the entire Elasticsearch index(s) into memory."
|
||||||
|
"If this is explicitly required and there is sufficient memory, call `ed.eland_to_pandas(ed_df).values`"
|
||||||
|
)
|
||||||
|
|
||||||
|
to_numpy = values
|
||||||
|
@ -31,7 +31,6 @@ from pandas.util._validators import validate_bool_kwarg
|
|||||||
|
|
||||||
from eland import ElandQueryCompiler
|
from eland import ElandQueryCompiler
|
||||||
|
|
||||||
|
|
||||||
class NDFrame:
|
class NDFrame:
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -65,6 +64,7 @@ class NDFrame:
|
|||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_api_docs:`pandas.DataFrame.index`
|
:pandas_api_docs:`pandas.DataFrame.index`
|
||||||
|
:pandas_api_docs:`pandas.Series.index`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -72,6 +72,10 @@ class NDFrame:
|
|||||||
>>> assert isinstance(df.index, ed.Index)
|
>>> assert isinstance(df.index, ed.Index)
|
||||||
>>> df.index.index_field
|
>>> df.index.index_field
|
||||||
'_id'
|
'_id'
|
||||||
|
>>> s = df['Carrier']
|
||||||
|
>>> assert isinstance(s.index, ed.Index)
|
||||||
|
>>> s.index.index_field
|
||||||
|
'_id'
|
||||||
"""
|
"""
|
||||||
return self._query_compiler.index
|
return self._query_compiler.index
|
||||||
|
|
||||||
@ -104,9 +108,8 @@ class NDFrame:
|
|||||||
"""
|
"""
|
||||||
return self._query_compiler.dtypes
|
return self._query_compiler.dtypes
|
||||||
|
|
||||||
def _build_repr_df(self, num_rows, num_cols):
|
def _build_repr(self, num_rows):
|
||||||
# Overriden version of BasePandasDataset._build_repr_df
|
# self could be Series or DataFrame
|
||||||
# to avoid issues with concat
|
|
||||||
if len(self.index) <= num_rows:
|
if len(self.index) <= num_rows:
|
||||||
return self._to_pandas()
|
return self._to_pandas()
|
||||||
|
|
||||||
|
@ -588,6 +588,7 @@ class Operations:
|
|||||||
df = self._apply_df_post_processing(df, post_processing)
|
df = self._apply_df_post_processing(df, post_processing)
|
||||||
collector.collect(df)
|
collector.collect(df)
|
||||||
|
|
||||||
|
|
||||||
def iloc(self, index, field_names):
|
def iloc(self, index, field_names):
|
||||||
# index and field_names are indexers
|
# index and field_names are indexers
|
||||||
task = ('iloc', (index, field_names))
|
task = ('iloc', (index, field_names))
|
||||||
@ -881,9 +882,10 @@ class Operations:
|
|||||||
left_field = item[1][1][1][0]
|
left_field = item[1][1][1][0]
|
||||||
right_field = item[1][1][1][1]
|
right_field = item[1][1][1][1]
|
||||||
|
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-api-reference-shared-java-lang.html#painless-api-reference-shared-Math
|
||||||
if isinstance(right_field, str):
|
if isinstance(right_field, str):
|
||||||
"""
|
"""
|
||||||
(if op_name = 'truediv')
|
(if op_name = '__truediv__')
|
||||||
|
|
||||||
"script_fields": {
|
"script_fields": {
|
||||||
"field_name": {
|
"field_name": {
|
||||||
@ -893,12 +895,23 @@ class Operations:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
if op_name == 'truediv':
|
if op_name == '__add__':
|
||||||
op = '/'
|
source = "doc['{0}'].value + doc['{1}'].value".format(left_field, right_field)
|
||||||
|
elif op_name == '__truediv__':
|
||||||
|
source = "doc['{0}'].value / doc['{1}'].value".format(left_field, right_field)
|
||||||
|
elif op_name == '__floordiv__':
|
||||||
|
source = "Math.floor(doc['{0}'].value / doc['{1}'].value)".format(left_field, right_field)
|
||||||
|
elif op_name == '__pow__':
|
||||||
|
source = "Math.pow(doc['{0}'].value, doc['{1}'].value)".format(left_field, right_field)
|
||||||
|
elif op_name == '__mod__':
|
||||||
|
source = "doc['{0}'].value % doc['{1}'].value".format(left_field, right_field)
|
||||||
|
elif op_name == '__mul__':
|
||||||
|
source = "doc['{0}'].value * doc['{1}'].value".format(left_field, right_field)
|
||||||
|
elif op_name == '__sub__':
|
||||||
|
source = "doc['{0}'].value - doc['{1}'].value".format(left_field, right_field)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
||||||
|
|
||||||
source = "doc['{0}'].value {1} doc['{2}'].value".format(left_field, op, right_field)
|
|
||||||
|
|
||||||
if query_params['query_script_fields'] is None:
|
if query_params['query_script_fields'] is None:
|
||||||
query_params['query_script_fields'] = {}
|
query_params['query_script_fields'] = {}
|
||||||
@ -909,7 +922,7 @@ class Operations:
|
|||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
"""
|
"""
|
||||||
(if op_name = 'truediv')
|
(if op_name = '__truediv__')
|
||||||
|
|
||||||
"script_fields": {
|
"script_fields": {
|
||||||
"field_name": {
|
"field_name": {
|
||||||
@ -919,12 +932,23 @@ class Operations:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
if op_name == 'truediv':
|
if op_name == '__add__':
|
||||||
op = '/'
|
source = "doc['{0}'].value + {1}".format(left_field, right_field)
|
||||||
|
elif op_name == '__truediv__':
|
||||||
|
source = "doc['{0}'].value / {1}".format(left_field, right_field)
|
||||||
|
elif op_name == '__floordiv__':
|
||||||
|
source = "Math.floor(doc['{0}'].value / {1})".format(left_field, right_field)
|
||||||
|
elif op_name == '__pow__':
|
||||||
|
source = "Math.pow(doc['{0}'].value, {1})".format(left_field, right_field)
|
||||||
|
elif op_name == '__mod__':
|
||||||
|
source = "doc['{0}'].value % {1}".format(left_field, right_field)
|
||||||
|
elif op_name == '__mul__':
|
||||||
|
source = "doc['{0}'].value * {1}".format(left_field, right_field)
|
||||||
|
elif op_name == '__sub__':
|
||||||
|
source = "doc['{0}'].value - {1}".format(left_field, right_field)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
||||||
|
|
||||||
source = "doc['{0}'].value {1} {2}".format(left_field, op, right_field)
|
|
||||||
|
|
||||||
if query_params['query_script_fields'] is None:
|
if query_params['query_script_fields'] is None:
|
||||||
query_params['query_script_fields'] = {}
|
query_params['query_script_fields'] = {}
|
||||||
|
@ -239,9 +239,9 @@ class ElandQueryCompiler:
|
|||||||
# Create pandas DataFrame
|
# Create pandas DataFrame
|
||||||
df = pd.DataFrame(data=rows, index=index)
|
df = pd.DataFrame(data=rows, index=index)
|
||||||
|
|
||||||
# _source may not contain all columns in the mapping
|
# _source may not contain all field_names in the mapping
|
||||||
# therefore, fill in missing columns
|
# therefore, fill in missing field_names
|
||||||
# (note this returns self.columns NOT IN df.columns)
|
# (note this returns self.field_names NOT IN df.columns)
|
||||||
missing_field_names = list(set(self.field_names) - set(df.columns))
|
missing_field_names = list(set(self.field_names) - set(df.columns))
|
||||||
|
|
||||||
for missing in missing_field_names:
|
for missing in missing_field_names:
|
||||||
|
616
eland/series.py
616
eland/series.py
@ -11,19 +11,26 @@ without storing the dataset in local memory.
|
|||||||
|
|
||||||
Implementation Details
|
Implementation Details
|
||||||
----------------------
|
----------------------
|
||||||
Based on NDFrame which underpins eland.1DataFrame
|
Based on NDFrame which underpins eland.DataFrame
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import warnings
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
from pandas.io.common import _expand_user, _stringify_path
|
||||||
|
|
||||||
from eland import NDFrame
|
from eland import NDFrame
|
||||||
|
from eland.common import DEFAULT_NUM_ROWS_DISPLAYED
|
||||||
from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
|
from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
|
||||||
|
|
||||||
|
|
||||||
|
def _get_method_name():
|
||||||
|
return sys._getframe(1).f_code.co_name
|
||||||
|
|
||||||
|
|
||||||
class Series(NDFrame):
|
class Series(NDFrame):
|
||||||
"""
|
"""
|
||||||
pandas.Series like API that proxies into Elasticsearch index(es).
|
pandas.Series like API that proxies into Elasticsearch index(es).
|
||||||
@ -34,35 +41,35 @@ class Series(NDFrame):
|
|||||||
A reference to a Elasticsearch python client
|
A reference to a Elasticsearch python client
|
||||||
|
|
||||||
index_pattern : str
|
index_pattern : str
|
||||||
An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*).
|
An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-\*\).
|
||||||
|
|
||||||
index_field : str
|
index_field : str
|
||||||
The field to base the series on
|
The field to base the series on
|
||||||
|
|
||||||
See Also
|
|
||||||
--------
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
|
|
||||||
import eland as ed
|
|
||||||
client = ed.Client(Elasticsearch())
|
|
||||||
s = ed.DataFrame(client, 'reviews', 'date')
|
|
||||||
df.head()
|
|
||||||
reviewerId vendorId rating date
|
|
||||||
0 0 0 5 2006-04-07 17:08
|
|
||||||
1 1 1 5 2006-05-04 12:16
|
|
||||||
2 2 2 4 2006-04-21 12:26
|
|
||||||
3 3 3 5 2006-04-18 15:48
|
|
||||||
4 3 4 5 2006-04-18 15:49
|
|
||||||
|
|
||||||
Notice that the types are based on Elasticsearch mappings
|
|
||||||
|
|
||||||
Notes
|
Notes
|
||||||
-----
|
-----
|
||||||
If the Elasticsearch index is deleted or index mappings are changed after this
|
If the Elasticsearch index is deleted or index mappings are changed after this
|
||||||
object is created, the object is not rebuilt and so inconsistencies can occur.
|
object is created, the object is not rebuilt and so inconsistencies can occur.
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.Series`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> ed.Series(client='localhost', index_pattern='flights', name='Carrier')
|
||||||
|
0 Kibana Airlines
|
||||||
|
1 Logstash Airways
|
||||||
|
2 Logstash Airways
|
||||||
|
3 Kibana Airlines
|
||||||
|
4 Kibana Airlines
|
||||||
|
...
|
||||||
|
13054 Logstash Airways
|
||||||
|
13055 Logstash Airways
|
||||||
|
13056 Logstash Airways
|
||||||
|
13057 JetBeats
|
||||||
|
13058 JetBeats
|
||||||
|
Name: Carrier, Length: 13059, dtype: object
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -94,6 +101,34 @@ class Series(NDFrame):
|
|||||||
"""
|
"""
|
||||||
return len(self.index) == 0
|
return len(self.index) == 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def shape(self):
|
||||||
|
"""
|
||||||
|
Return a tuple representing the dimensionality of the Series.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
shape: tuple
|
||||||
|
|
||||||
|
0. number of rows
|
||||||
|
1. number of columns
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
- number of rows ``len(series)`` queries Elasticsearch
|
||||||
|
- number of columns == 1
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.Series('localhost', 'ecommerce', name='total_quantity')
|
||||||
|
>>> df.shape
|
||||||
|
(4675, 1)
|
||||||
|
"""
|
||||||
|
num_rows = len(self)
|
||||||
|
num_columns = 1
|
||||||
|
|
||||||
|
return num_rows, num_columns
|
||||||
|
|
||||||
def _get_name(self):
|
def _get_name(self):
|
||||||
return self._query_compiler.columns[0]
|
return self._query_compiler.columns[0]
|
||||||
|
|
||||||
@ -118,7 +153,7 @@ class Series(NDFrame):
|
|||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_api_docs:pandas.Series.rename
|
:pandas_api_docs:`pandas.Series.rename`
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@ -200,12 +235,39 @@ class Series(NDFrame):
|
|||||||
|
|
||||||
return self._query_compiler.value_counts(es_size)
|
return self._query_compiler.value_counts(es_size)
|
||||||
|
|
||||||
|
# dtype not implemented for Series as causes query to fail
|
||||||
|
# in pandas.core.computation.ops.Term.type
|
||||||
|
|
||||||
# ----------------------------------------------------------------------
|
# ----------------------------------------------------------------------
|
||||||
# Rendering Methods
|
# Rendering Methods
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
num_rows = pd.get_option("max_rows") or 60
|
"""
|
||||||
|
Return a string representation for a particular Series.
|
||||||
|
"""
|
||||||
|
buf = StringIO()
|
||||||
|
|
||||||
return self.to_string(max_rows=num_rows)
|
# max_rows and max_cols determine the maximum size of the pretty printed tabular
|
||||||
|
# representation of the series. pandas defaults are 60 and 20 respectively.
|
||||||
|
# series where len(series) > max_rows shows a truncated view with 10 rows shown.
|
||||||
|
max_rows = pd.get_option("display.max_rows")
|
||||||
|
min_rows = pd.get_option("display.min_rows")
|
||||||
|
|
||||||
|
if len(self) > max_rows:
|
||||||
|
max_rows = min_rows
|
||||||
|
|
||||||
|
show_dimensions = pd.get_option("display.show_dimensions")
|
||||||
|
|
||||||
|
self.to_string(
|
||||||
|
buf=buf,
|
||||||
|
name=self.name,
|
||||||
|
dtype=True,
|
||||||
|
min_rows=min_rows,
|
||||||
|
max_rows=max_rows,
|
||||||
|
length=show_dimensions,
|
||||||
|
)
|
||||||
|
result = buf.getvalue()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def to_string(
|
def to_string(
|
||||||
self,
|
self,
|
||||||
@ -217,33 +279,69 @@ class Series(NDFrame):
|
|||||||
length=False,
|
length=False,
|
||||||
dtype=False,
|
dtype=False,
|
||||||
name=False,
|
name=False,
|
||||||
max_rows=None):
|
max_rows=None,
|
||||||
|
min_rows=None,
|
||||||
|
):
|
||||||
|
# In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
|
||||||
|
# by limiting rows by default.
|
||||||
|
num_rows = len(self) # avoid multiple calls
|
||||||
|
if num_rows <= DEFAULT_NUM_ROWS_DISPLAYED:
|
||||||
if max_rows is None:
|
if max_rows is None:
|
||||||
|
max_rows = num_rows
|
||||||
|
else:
|
||||||
|
max_rows = min(num_rows, max_rows)
|
||||||
|
elif max_rows is None:
|
||||||
warnings.warn("Series.to_string called without max_rows set "
|
warnings.warn("Series.to_string called without max_rows set "
|
||||||
"- this will return entire index results. "
|
"- this will return entire index results. "
|
||||||
"Setting max_rows=60, overwrite if different behaviour is required.")
|
"Setting max_rows={default}"
|
||||||
max_rows = 60
|
" overwrite if different behaviour is required."
|
||||||
|
.format(default=DEFAULT_NUM_ROWS_DISPLAYED),
|
||||||
|
UserWarning)
|
||||||
|
max_rows = DEFAULT_NUM_ROWS_DISPLAYED
|
||||||
|
|
||||||
|
# because of the way pandas handles max_rows=0, not having this throws an error
|
||||||
|
# see eland issue #56
|
||||||
|
if max_rows == 0:
|
||||||
|
max_rows = 1
|
||||||
|
|
||||||
# Create a slightly bigger dataframe than display
|
# Create a slightly bigger dataframe than display
|
||||||
temp_df = self._build_repr_df(max_rows + 1, None)
|
temp_series = self._build_repr(max_rows + 1)
|
||||||
if isinstance(temp_df, pd.DataFrame):
|
|
||||||
temp_df = temp_df[self.name]
|
if buf is not None:
|
||||||
temp_str = repr(temp_df)
|
_buf = _expand_user(_stringify_path(buf))
|
||||||
if self.name is not None:
|
|
||||||
name_str = "Name: {}, ".format(str(self.name))
|
|
||||||
else:
|
else:
|
||||||
name_str = ""
|
_buf = StringIO()
|
||||||
if len(self.index) > max_rows:
|
|
||||||
len_str = "Length: {}, ".format(len(self.index))
|
# Create repr of fake series without name, length, dtype summary
|
||||||
else:
|
temp_str = temp_series.to_string(buf=_buf,
|
||||||
len_str = ""
|
na_rep=na_rep,
|
||||||
dtype_str = "dtype: {}".format(temp_str.rsplit("dtype: ", 1)[-1])
|
float_format=float_format,
|
||||||
if len(self) == 0:
|
header=header,
|
||||||
return "Series([], {}{}".format(name_str, dtype_str)
|
index=index,
|
||||||
return temp_str.rsplit("\nName:", 1)[0] + "\n{}{}{}".format(
|
length=False,
|
||||||
name_str, len_str, dtype_str
|
dtype=False,
|
||||||
)
|
name=False,
|
||||||
|
max_rows=max_rows)
|
||||||
|
|
||||||
|
# Create the summary
|
||||||
|
footer = ""
|
||||||
|
if name and self.name is not None:
|
||||||
|
footer += "Name: {}".format(str(self.name))
|
||||||
|
if length and len(self) > max_rows:
|
||||||
|
if footer:
|
||||||
|
footer += ", "
|
||||||
|
footer += "Length: {}".format(len(self.index))
|
||||||
|
if dtype:
|
||||||
|
if footer:
|
||||||
|
footer += ", "
|
||||||
|
footer += "dtype: {}".format(temp_series.dtype)
|
||||||
|
|
||||||
|
if len(footer) > 0:
|
||||||
|
_buf.write("\n{}".format(footer))
|
||||||
|
|
||||||
|
if buf is None:
|
||||||
|
result = _buf.getvalue()
|
||||||
|
return result
|
||||||
|
|
||||||
def _to_pandas(self):
|
def _to_pandas(self):
|
||||||
return self._query_compiler.to_pandas()[self.name]
|
return self._query_compiler.to_pandas()[self.name]
|
||||||
@ -321,13 +419,16 @@ class Series(NDFrame):
|
|||||||
@property
|
@property
|
||||||
def ndim(self):
|
def ndim(self):
|
||||||
"""
|
"""
|
||||||
Returns 1 by definition of a Series1
|
Returns 1 by definition of a Series
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
int
|
int
|
||||||
By definition 1
|
By definition 1
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.Series.ndim`
|
||||||
"""
|
"""
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
@ -338,34 +439,317 @@ class Series(NDFrame):
|
|||||||
|
|
||||||
return buf.getvalue()
|
return buf.getvalue()
|
||||||
|
|
||||||
def __truediv__(self, right):
|
def __add__(self, right):
|
||||||
return self.truediv(right)
|
|
||||||
|
|
||||||
def truediv(self, right):
|
|
||||||
"""
|
"""
|
||||||
return a / b
|
Return addition of series and right, element-wise (binary operator add).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
right: eland.Series
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
eland.Series
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||||
|
>>> df.taxful_total_price
|
||||||
|
0 36.98
|
||||||
|
1 53.98
|
||||||
|
2 199.98
|
||||||
|
3 174.98
|
||||||
|
4 80.98
|
||||||
|
Name: taxful_total_price, dtype: float64
|
||||||
|
>>> df.total_quantity
|
||||||
|
0 2
|
||||||
|
1 2
|
||||||
|
2 2
|
||||||
|
3 2
|
||||||
|
4 2
|
||||||
|
Name: total_quantity, dtype: int64
|
||||||
|
>>> df.taxful_total_price + df.total_quantity
|
||||||
|
0 38.980000
|
||||||
|
1 55.980000
|
||||||
|
2 201.979996
|
||||||
|
3 176.979996
|
||||||
|
4 82.980003
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return self._numeric_op(right, _get_method_name())
|
||||||
|
|
||||||
|
def __truediv__(self, right):
|
||||||
|
"""
|
||||||
|
Return floating division of series and right, element-wise (binary operator truediv).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
right: eland.Series
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
eland.Series
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||||
|
>>> df.taxful_total_price
|
||||||
|
0 36.98
|
||||||
|
1 53.98
|
||||||
|
2 199.98
|
||||||
|
3 174.98
|
||||||
|
4 80.98
|
||||||
|
Name: taxful_total_price, dtype: float64
|
||||||
|
>>> df.total_quantity
|
||||||
|
0 2
|
||||||
|
1 2
|
||||||
|
2 2
|
||||||
|
3 2
|
||||||
|
4 2
|
||||||
|
Name: total_quantity, dtype: int64
|
||||||
|
>>> df.taxful_total_price / df.total_quantity
|
||||||
|
0 18.490000
|
||||||
|
1 26.990000
|
||||||
|
2 99.989998
|
||||||
|
3 87.489998
|
||||||
|
4 40.490002
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return self._numeric_op(right, _get_method_name())
|
||||||
|
|
||||||
|
def __floordiv__(self, right):
|
||||||
|
"""
|
||||||
|
Return integer division of series and right, element-wise (binary operator floordiv //).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
right: eland.Series
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
eland.Series
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||||
|
>>> df.taxful_total_price
|
||||||
|
0 36.98
|
||||||
|
1 53.98
|
||||||
|
2 199.98
|
||||||
|
3 174.98
|
||||||
|
4 80.98
|
||||||
|
Name: taxful_total_price, dtype: float64
|
||||||
|
>>> df.total_quantity
|
||||||
|
0 2
|
||||||
|
1 2
|
||||||
|
2 2
|
||||||
|
3 2
|
||||||
|
4 2
|
||||||
|
Name: total_quantity, dtype: int64
|
||||||
|
>>> df.taxful_total_price // df.total_quantity
|
||||||
|
0 18.0
|
||||||
|
1 26.0
|
||||||
|
2 99.0
|
||||||
|
3 87.0
|
||||||
|
4 40.0
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return self._numeric_op(right, _get_method_name())
|
||||||
|
|
||||||
|
def __mod__(self, right):
|
||||||
|
"""
|
||||||
|
Return modulo of series and right, element-wise (binary operator mod %).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
right: eland.Series
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
eland.Series
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||||
|
>>> df.taxful_total_price
|
||||||
|
0 36.98
|
||||||
|
1 53.98
|
||||||
|
2 199.98
|
||||||
|
3 174.98
|
||||||
|
4 80.98
|
||||||
|
Name: taxful_total_price, dtype: float64
|
||||||
|
>>> df.total_quantity
|
||||||
|
0 2
|
||||||
|
1 2
|
||||||
|
2 2
|
||||||
|
3 2
|
||||||
|
4 2
|
||||||
|
Name: total_quantity, dtype: int64
|
||||||
|
>>> df.taxful_total_price % df.total_quantity
|
||||||
|
0 0.980000
|
||||||
|
1 1.980000
|
||||||
|
2 1.979996
|
||||||
|
3 0.979996
|
||||||
|
4 0.980003
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return self._numeric_op(right, _get_method_name())
|
||||||
|
|
||||||
|
def __mul__(self, right):
|
||||||
|
"""
|
||||||
|
Return multiplication of series and right, element-wise (binary operator mul).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
right: eland.Series
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
eland.Series
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||||
|
>>> df.taxful_total_price
|
||||||
|
0 36.98
|
||||||
|
1 53.98
|
||||||
|
2 199.98
|
||||||
|
3 174.98
|
||||||
|
4 80.98
|
||||||
|
Name: taxful_total_price, dtype: float64
|
||||||
|
>>> df.total_quantity
|
||||||
|
0 2
|
||||||
|
1 2
|
||||||
|
2 2
|
||||||
|
3 2
|
||||||
|
4 2
|
||||||
|
Name: total_quantity, dtype: int64
|
||||||
|
>>> df.taxful_total_price * df.total_quantity
|
||||||
|
0 73.959999
|
||||||
|
1 107.959999
|
||||||
|
2 399.959991
|
||||||
|
3 349.959991
|
||||||
|
4 161.960007
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return self._numeric_op(right, _get_method_name())
|
||||||
|
|
||||||
|
def __sub__(self, right):
|
||||||
|
"""
|
||||||
|
Return subtraction of series and right, element-wise (binary operator sub).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
right: eland.Series
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
eland.Series
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||||
|
>>> df.taxful_total_price
|
||||||
|
0 36.98
|
||||||
|
1 53.98
|
||||||
|
2 199.98
|
||||||
|
3 174.98
|
||||||
|
4 80.98
|
||||||
|
Name: taxful_total_price, dtype: float64
|
||||||
|
>>> df.total_quantity
|
||||||
|
0 2
|
||||||
|
1 2
|
||||||
|
2 2
|
||||||
|
3 2
|
||||||
|
4 2
|
||||||
|
Name: total_quantity, dtype: int64
|
||||||
|
>>> df.taxful_total_price - df.total_quantity
|
||||||
|
0 34.980000
|
||||||
|
1 51.980000
|
||||||
|
2 197.979996
|
||||||
|
3 172.979996
|
||||||
|
4 78.980003
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return self._numeric_op(right, _get_method_name())
|
||||||
|
|
||||||
|
def __pow__(self, right):
|
||||||
|
"""
|
||||||
|
Return exponential power of series and right, element-wise (binary operator pow \**\).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
right: eland.Series
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
eland.Series
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'ecommerce').head(5)
|
||||||
|
>>> df.taxful_total_price
|
||||||
|
0 36.98
|
||||||
|
1 53.98
|
||||||
|
2 199.98
|
||||||
|
3 174.98
|
||||||
|
4 80.98
|
||||||
|
Name: taxful_total_price, dtype: float64
|
||||||
|
>>> df.total_quantity
|
||||||
|
0 2
|
||||||
|
1 2
|
||||||
|
2 2
|
||||||
|
3 2
|
||||||
|
4 2
|
||||||
|
Name: total_quantity, dtype: int64
|
||||||
|
>>> df.taxful_total_price ** df.total_quantity
|
||||||
|
0 1367.520366
|
||||||
|
1 2913.840351
|
||||||
|
2 39991.998691
|
||||||
|
3 30617.998905
|
||||||
|
4 6557.760944
|
||||||
|
dtype: float64
|
||||||
|
"""
|
||||||
|
return self._numeric_op(right, _get_method_name())
|
||||||
|
|
||||||
|
add = __add__
|
||||||
|
div = __truediv__
|
||||||
|
divide = __truediv__
|
||||||
|
floordiv = __floordiv__
|
||||||
|
mod = __mod__
|
||||||
|
mul = __mul__
|
||||||
|
multiply = __mul__
|
||||||
|
pow = __pow__
|
||||||
|
sub = __sub__
|
||||||
|
subtract = __sub__
|
||||||
|
truediv = __truediv__
|
||||||
|
|
||||||
|
def _numeric_op(self, right, method_name):
|
||||||
|
"""
|
||||||
|
return a op b
|
||||||
|
|
||||||
a & b == Series
|
a & b == Series
|
||||||
a & b must share same eland.Client, index_pattern and index_field
|
a & b must share same eland.Client, index_pattern and index_field
|
||||||
|
a == Series, b == numeric
|
||||||
"""
|
"""
|
||||||
if isinstance(right, Series):
|
if isinstance(right, Series):
|
||||||
# Check compatibility
|
# Check compatibility
|
||||||
self._query_compiler.check_arithmetics(right._query_compiler)
|
self._query_compiler.check_arithmetics(right._query_compiler)
|
||||||
|
|
||||||
new_field_name = "{0}_{1}_{2}".format(self.name, "truediv", right.name)
|
new_field_name = "{0}_{1}_{2}".format(self.name, method_name, right.name)
|
||||||
|
|
||||||
# Compatible, so create new Series
|
# Compatible, so create new Series
|
||||||
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
||||||
new_field_name, 'truediv', self.name, right.name))
|
new_field_name, method_name, self.name, right.name))
|
||||||
series.name = None
|
series.name = None
|
||||||
|
|
||||||
return series
|
return series
|
||||||
elif isinstance(right, (int, float)): # TODO extend to numpy types
|
elif isinstance(right, (int, float)): # TODO extend to numpy types
|
||||||
new_field_name = "{0}_{1}_{2}".format(self.name, "truediv", str(right).replace('.','_'))
|
new_field_name = "{0}_{1}_{2}".format(self.name, method_name, str(right).replace('.', '_'))
|
||||||
|
|
||||||
# Compatible, so create new Series
|
# Compatible, so create new Series
|
||||||
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
||||||
new_field_name, 'truediv', self.name, float(right))) # force rhs to float
|
new_field_name, method_name, self.name, float(right))) # force rhs to float
|
||||||
|
|
||||||
# name of Series remains original name
|
# name of Series remains original name
|
||||||
series.name = self.name
|
series.name = self.name
|
||||||
@ -374,5 +758,123 @@ class Series(NDFrame):
|
|||||||
else:
|
else:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"Can only perform arithmetic operation on selected types "
|
"Can only perform arithmetic operation on selected types "
|
||||||
"{0} != {1}".format(type(self), type(right))
|
"{0} != {1} for {2}".format(type(self), type(right), method_name)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def max(self):
|
||||||
|
"""
|
||||||
|
Return the maximum of the Series values
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
max value
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.Series.max`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> s = ed.Series('localhost', 'flights', name='AvgTicketPrice')
|
||||||
|
>>> int(s.max())
|
||||||
|
1199
|
||||||
|
"""
|
||||||
|
results = super().max()
|
||||||
|
return results.squeeze()
|
||||||
|
|
||||||
|
def mean(self):
|
||||||
|
"""
|
||||||
|
Return the mean of the Series values
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
max value
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.Series.mean`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> s = ed.Series('localhost', 'flights', name='AvgTicketPrice')
|
||||||
|
>>> int(s.mean())
|
||||||
|
628
|
||||||
|
"""
|
||||||
|
results = super().mean()
|
||||||
|
return results.squeeze()
|
||||||
|
|
||||||
|
def min(self):
|
||||||
|
"""
|
||||||
|
Return the minimum of the Series values
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
max value
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.Series.min`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> s = ed.Series('localhost', 'flights', name='AvgTicketPrice')
|
||||||
|
>>> int(s.min())
|
||||||
|
100
|
||||||
|
"""
|
||||||
|
results = super().min()
|
||||||
|
return results.squeeze()
|
||||||
|
|
||||||
|
def sum(self):
|
||||||
|
"""
|
||||||
|
Return the sum of the Series values
|
||||||
|
|
||||||
|
TODO - implement remainder of pandas arguments, currently non-numerics are not supported
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
max value
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.Series.sum`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> s = ed.Series('localhost', 'flights', name='AvgTicketPrice')
|
||||||
|
>>> int(s.sum())
|
||||||
|
8204364
|
||||||
|
"""
|
||||||
|
results = super().sum()
|
||||||
|
return results.squeeze()
|
||||||
|
|
||||||
|
def nunique(self):
|
||||||
|
"""
|
||||||
|
Return the sum of the Series values
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
max value
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:`pandas.Series.sum`
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> s = ed.Series('localhost', 'flights', name='Carrier')
|
||||||
|
>>> s.nunique()
|
||||||
|
4
|
||||||
|
"""
|
||||||
|
results = super().nunique()
|
||||||
|
return results.squeeze()
|
||||||
|
@ -279,10 +279,10 @@ ECOMMERCE_MAPPING = {"mappings": {
|
|||||||
"type": "keyword"
|
"type": "keyword"
|
||||||
},
|
},
|
||||||
"taxful_total_price": {
|
"taxful_total_price": {
|
||||||
"type": "half_float"
|
"type": "float"
|
||||||
},
|
},
|
||||||
"taxless_total_price": {
|
"taxless_total_price": {
|
||||||
"type": "half_float"
|
"type": "float"
|
||||||
},
|
},
|
||||||
"total_quantity": {
|
"total_quantity": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
|
@ -4,6 +4,8 @@ from eland.tests.common import TestData, assert_pandas_eland_series_equal
|
|||||||
from pandas.util.testing import assert_series_equal
|
from pandas.util.testing import assert_series_equal
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class TestSeriesArithmetics(TestData):
|
class TestSeriesArithmetics(TestData):
|
||||||
|
|
||||||
@ -15,29 +17,35 @@ class TestSeriesArithmetics(TestData):
|
|||||||
with pytest.raises(TypeError):
|
with pytest.raises(TypeError):
|
||||||
ed_df['total_quantity'] / pd_df['taxful_total_price']
|
ed_df['total_quantity'] / pd_df['taxful_total_price']
|
||||||
|
|
||||||
def test_ecommerce_series_div(self):
|
def test_ecommerce_series_basic_arithmetics(self):
|
||||||
pd_df = self.pd_ecommerce()
|
pd_df = self.pd_ecommerce().head(100)
|
||||||
ed_df = self.ed_ecommerce()
|
ed_df = self.ed_ecommerce().head(100)
|
||||||
|
|
||||||
pd_avg_price = pd_df['total_quantity'] / pd_df['taxful_total_price']
|
ops = ['__add__',
|
||||||
ed_avg_price = ed_df['total_quantity'] / ed_df['taxful_total_price']
|
'__truediv__',
|
||||||
|
'__floordiv__',
|
||||||
|
'__pow__',
|
||||||
|
'__mod__',
|
||||||
|
'__mul__',
|
||||||
|
'__sub__',
|
||||||
|
'add',
|
||||||
|
'truediv',
|
||||||
|
'floordiv',
|
||||||
|
'pow',
|
||||||
|
'mod',
|
||||||
|
'mul',
|
||||||
|
'sub']
|
||||||
|
|
||||||
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
for op in ops:
|
||||||
|
pd_series = getattr(pd_df['taxful_total_price'], op)(pd_df['total_quantity'])
|
||||||
|
ed_series = getattr(ed_df['taxful_total_price'], op)(ed_df['total_quantity'])
|
||||||
|
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
|
||||||
|
|
||||||
def test_ecommerce_series_div_float(self):
|
pd_series = getattr(pd_df['taxful_total_price'], op)(10.56)
|
||||||
pd_df = self.pd_ecommerce()
|
ed_series = getattr(ed_df['taxful_total_price'], op)(10.56)
|
||||||
ed_df = self.ed_ecommerce()
|
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
|
||||||
|
|
||||||
pd_avg_price = pd_df['total_quantity'] / 10.0
|
pd_series = getattr(pd_df['taxful_total_price'], op)(int(8))
|
||||||
ed_avg_price = ed_df['total_quantity'] / 10.0
|
ed_series = getattr(ed_df['taxful_total_price'], op)(int(8))
|
||||||
|
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
|
||||||
|
|
||||||
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
|
||||||
|
|
||||||
def test_ecommerce_series_div_int(self):
|
|
||||||
pd_df = self.pd_ecommerce()
|
|
||||||
ed_df = self.ed_ecommerce()
|
|
||||||
|
|
||||||
pd_avg_price = pd_df['total_quantity'] / int(10)
|
|
||||||
ed_avg_price = ed_df['total_quantity'] / int(10)
|
|
||||||
|
|
||||||
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
|
||||||
|
17
eland/tests/series/test_info_es_pytest.py
Normal file
17
eland/tests/series/test_info_es_pytest.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# File called _pytest for PyCharm compatability
|
||||||
|
|
||||||
|
from pandas.util.testing import assert_almost_equal
|
||||||
|
|
||||||
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
import eland as ed
|
||||||
|
|
||||||
|
|
||||||
|
class TestSeriesInfoEs(TestData):
|
||||||
|
|
||||||
|
def test_flights_info_es(self):
|
||||||
|
ed_flights = self.ed_flights()['AvgTicketPrice']
|
||||||
|
|
||||||
|
# No assertion, just test it can be called
|
||||||
|
info_es = ed_flights.info_es()
|
||||||
|
|
44
eland/tests/series/test_metrics_pytest.py
Normal file
44
eland/tests/series/test_metrics_pytest.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# File called _pytest for PyCharm compatability
|
||||||
|
|
||||||
|
from pandas.util.testing import assert_almost_equal
|
||||||
|
|
||||||
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
import eland as ed
|
||||||
|
|
||||||
|
|
||||||
|
class TestSeriesMetrics(TestData):
|
||||||
|
|
||||||
|
funcs = ['max', 'min', 'mean', 'sum']
|
||||||
|
|
||||||
|
def test_flights_metrics(self):
|
||||||
|
pd_flights = self.pd_flights()['AvgTicketPrice']
|
||||||
|
ed_flights = self.ed_flights()['AvgTicketPrice']
|
||||||
|
|
||||||
|
for func in self.funcs:
|
||||||
|
pd_metric = getattr(pd_flights, func)()
|
||||||
|
ed_metric = getattr(ed_flights, func)()
|
||||||
|
assert_almost_equal(pd_metric, ed_metric, check_less_precise=True)
|
||||||
|
|
||||||
|
def test_ecommerce_selected_non_numeric_source_fields(self):
|
||||||
|
# None of these are numeric
|
||||||
|
column = 'category'
|
||||||
|
|
||||||
|
ed_ecommerce = self.ed_ecommerce()[column]
|
||||||
|
|
||||||
|
for func in self.funcs:
|
||||||
|
ed_metric = getattr(ed_ecommerce, func)()
|
||||||
|
assert ed_metric.empty
|
||||||
|
|
||||||
|
|
||||||
|
def test_ecommerce_selected_all_numeric_source_fields(self):
|
||||||
|
# All of these are numeric
|
||||||
|
columns = ['total_quantity', 'taxful_total_price', 'taxless_total_price']
|
||||||
|
|
||||||
|
for column in columns:
|
||||||
|
pd_ecommerce = self.pd_ecommerce()[column]
|
||||||
|
ed_ecommerce = self.ed_ecommerce()[column]
|
||||||
|
|
||||||
|
for func in self.funcs:
|
||||||
|
assert_almost_equal(getattr(pd_ecommerce, func)(), getattr(ed_ecommerce, func)(),
|
||||||
|
check_less_precise=True)
|
@ -1,13 +1,14 @@
|
|||||||
# File called _pytest for PyCharm compatability
|
# File called _pytest for PyCharm compatability
|
||||||
import eland as ed
|
import eland as ed
|
||||||
|
import pandas as pd
|
||||||
from eland.tests import ELASTICSEARCH_HOST
|
from eland.tests import ELASTICSEARCH_HOST
|
||||||
from eland.tests import FLIGHTS_INDEX_NAME
|
from eland.tests import FLIGHTS_INDEX_NAME, ECOMMERCE_INDEX_NAME
|
||||||
from eland.tests.common import TestData
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
|
||||||
class TestSeriesRepr(TestData):
|
class TestSeriesRepr(TestData):
|
||||||
|
|
||||||
def test_repr(self):
|
def test_repr_flights_carrier(self):
|
||||||
pd_s = self.pd_flights()['Carrier']
|
pd_s = self.pd_flights()['Carrier']
|
||||||
ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'Carrier')
|
ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'Carrier')
|
||||||
|
|
||||||
@ -15,3 +16,12 @@ class TestSeriesRepr(TestData):
|
|||||||
ed_repr = repr(ed_s)
|
ed_repr = repr(ed_s)
|
||||||
|
|
||||||
assert pd_repr == ed_repr
|
assert pd_repr == ed_repr
|
||||||
|
|
||||||
|
def test_repr_flights_carrier_5(self):
|
||||||
|
pd_s = self.pd_flights()['Carrier'].head(5)
|
||||||
|
ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'Carrier').head(5)
|
||||||
|
|
||||||
|
pd_repr = repr(pd_s)
|
||||||
|
ed_repr = repr(ed_s)
|
||||||
|
|
||||||
|
assert pd_repr == ed_repr
|
||||||
|
Loading…
x
Reference in New Issue
Block a user