mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Fixing rename and truediv issues
tests pass TODO - implement additional orithmetic ops
This commit is contained in:
parent
c12bf9357b
commit
5d119215f8
@ -389,10 +389,10 @@ class DataFrame(NDFrame):
|
|||||||
<BLANKLINE>
|
<BLANKLINE>
|
||||||
[27 rows x 5 columns]
|
[27 rows x 5 columns]
|
||||||
Operations:
|
Operations:
|
||||||
tasks: [('boolean_filter', {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}), ('columns', ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']), ('tail', ('_doc', 5))]
|
tasks: [('boolean_filter', {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}), ('field_names', ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']), ('tail', ('_doc', 5))]
|
||||||
size: 5
|
size: 5
|
||||||
sort_params: _doc:desc
|
sort_params: _doc:desc
|
||||||
columns: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']
|
field_names: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']
|
||||||
post_processing: ['sort_index']
|
post_processing: ['sort_index']
|
||||||
<BLANKLINE>
|
<BLANKLINE>
|
||||||
"""
|
"""
|
||||||
|
@ -881,31 +881,58 @@ class Operations:
|
|||||||
left_field = item[1][1][1][0]
|
left_field = item[1][1][1][0]
|
||||||
right_field = item[1][1][1][1]
|
right_field = item[1][1][1][1]
|
||||||
|
|
||||||
"""
|
if isinstance(right_field, str):
|
||||||
(if op_name = 'truediv')
|
"""
|
||||||
|
(if op_name = 'truediv')
|
||||||
|
|
||||||
"script_fields": {
|
"script_fields": {
|
||||||
"field_name": {
|
"field_name": {
|
||||||
"script": {
|
"script": {
|
||||||
"source": "doc[left_field].value / doc[right_field].value"
|
"source": "doc[left_field].value / doc[right_field].value"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if op_name == 'truediv':
|
||||||
|
op = '/'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
||||||
|
|
||||||
|
source = "doc['{0}'].value {1} doc['{2}'].value".format(left_field, op, right_field)
|
||||||
|
|
||||||
|
if query_params['query_script_fields'] is None:
|
||||||
|
query_params['query_script_fields'] = {}
|
||||||
|
query_params['query_script_fields'][field_name] = {
|
||||||
|
'script': {
|
||||||
|
'source': source
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
"""
|
|
||||||
if op_name == 'truediv':
|
|
||||||
op = '/'
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
"""
|
||||||
|
(if op_name = 'truediv')
|
||||||
|
|
||||||
source = "doc['{0}'].value {1} doc['{2}'].value".format(left_field, op, right_field)
|
"script_fields": {
|
||||||
|
"field_name": {
|
||||||
if query_params['query_script_fields'] is None:
|
"script": {
|
||||||
query_params['query_script_fields'] = {}
|
"source": "doc[left_field].value / right_field"
|
||||||
query_params['query_script_fields'][field_name] = {
|
}
|
||||||
'script': {
|
}
|
||||||
'source': source
|
}
|
||||||
|
"""
|
||||||
|
if op_name == 'truediv':
|
||||||
|
op = '/'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Not implemented operation '{0}'".format(op_name))
|
||||||
|
|
||||||
|
source = "doc['{0}'].value {1} {2}".format(left_field, op, right_field)
|
||||||
|
|
||||||
|
if query_params['query_script_fields'] is None:
|
||||||
|
query_params['query_script_fields'] = {}
|
||||||
|
query_params['query_script_fields'][field_name] = {
|
||||||
|
'script': {
|
||||||
|
'source': source
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return query_params, post_processing
|
return query_params, post_processing
|
||||||
|
|
||||||
|
@ -96,6 +96,7 @@ class ElandQueryCompiler:
|
|||||||
self._operations.set_field_names(columns)
|
self._operations.set_field_names(columns)
|
||||||
|
|
||||||
columns = property(_get_columns, _set_columns)
|
columns = property(_get_columns, _set_columns)
|
||||||
|
|
||||||
index = property(_get_index)
|
index = property(_get_index)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -241,9 +242,9 @@ class ElandQueryCompiler:
|
|||||||
# _source may not contain all columns in the mapping
|
# _source may not contain all columns in the mapping
|
||||||
# therefore, fill in missing columns
|
# therefore, fill in missing columns
|
||||||
# (note this returns self.columns NOT IN df.columns)
|
# (note this returns self.columns NOT IN df.columns)
|
||||||
missing_columns = list(set(self.columns) - set(df.columns))
|
missing_field_names = list(set(self.field_names) - set(df.columns))
|
||||||
|
|
||||||
for missing in missing_columns:
|
for missing in missing_field_names:
|
||||||
is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(missing)
|
is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(missing)
|
||||||
df[missing] = pd.Series(dtype=pd_dtype)
|
df[missing] = pd.Series(dtype=pd_dtype)
|
||||||
|
|
||||||
@ -252,7 +253,8 @@ class ElandQueryCompiler:
|
|||||||
df.rename(columns=self._name_mapper.display_names_mapper(), inplace=True)
|
df.rename(columns=self._name_mapper.display_names_mapper(), inplace=True)
|
||||||
|
|
||||||
# Sort columns in mapping order
|
# Sort columns in mapping order
|
||||||
df = df[self.columns]
|
if len(self.columns) > 1:
|
||||||
|
df = df[self.columns]
|
||||||
|
|
||||||
return partial_result, df
|
return partial_result, df
|
||||||
|
|
||||||
@ -343,12 +345,14 @@ class ElandQueryCompiler:
|
|||||||
index_field=self._index.index_field, operations=self._operations.copy(),
|
index_field=self._index.index_field, operations=self._operations.copy(),
|
||||||
name_mapper=self._name_mapper.copy())
|
name_mapper=self._name_mapper.copy())
|
||||||
|
|
||||||
def rename(self, renames):
|
def rename(self, renames, inplace=False):
|
||||||
result = self.copy()
|
if inplace:
|
||||||
|
self._name_mapper.rename_display_name(renames)
|
||||||
result._name_mapper.rename_display_name(renames)
|
return self
|
||||||
|
else:
|
||||||
return result
|
result = self.copy()
|
||||||
|
result._name_mapper.rename_display_name(renames)
|
||||||
|
return result
|
||||||
|
|
||||||
def head(self, n):
|
def head(self, n):
|
||||||
result = self.copy()
|
result = self.copy()
|
||||||
@ -503,10 +507,10 @@ class ElandQueryCompiler:
|
|||||||
"{0} != {1}".format(self._index_pattern, right._index_pattern)
|
"{0} != {1}".format(self._index_pattern, right._index_pattern)
|
||||||
)
|
)
|
||||||
|
|
||||||
def arithmetic_op_fields(self, field_name, op, left_field, right_field):
|
def arithmetic_op_fields(self, new_field_name, op, left_field, right_field):
|
||||||
result = self.copy()
|
result = self.copy()
|
||||||
|
|
||||||
result._operations.arithmetic_op_fields(field_name, op, left_field, right_field)
|
result._operations.arithmetic_op_fields(new_field_name, op, left_field, right_field)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -547,10 +551,10 @@ class ElandQueryCompiler:
|
|||||||
self._field_to_display_names[field_name] = new_display_name
|
self._field_to_display_names[field_name] = new_display_name
|
||||||
|
|
||||||
def field_names_to_list(self):
|
def field_names_to_list(self):
|
||||||
return self._field_to_display_names.keys()
|
return sorted(list(self._field_to_display_names.keys()))
|
||||||
|
|
||||||
def display_names_to_list(self):
|
def display_names_to_list(self):
|
||||||
return self._display_to_field_names.keys()
|
return sorted(list(self._display_to_field_names.keys()))
|
||||||
|
|
||||||
# Return mapper values as dict
|
# Return mapper values as dict
|
||||||
def display_names_mapper(self):
|
def display_names_mapper(self):
|
||||||
@ -595,7 +599,7 @@ class ElandQueryCompiler:
|
|||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
return self.__constructor__(
|
return self.__constructor__(
|
||||||
field_to_display_names=self._field_to_display_names,
|
field_to_display_names=self._field_to_display_names.copy(),
|
||||||
display_to_field_names = self._display_to_field_names
|
display_to_field_names = self._display_to_field_names.copy()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@ Based on NDFrame which underpins eland.1DataFrame
|
|||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
from eland import NDFrame
|
from eland import NDFrame
|
||||||
from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
|
from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
|
||||||
@ -96,19 +97,58 @@ class Series(NDFrame):
|
|||||||
def _get_name(self):
|
def _get_name(self):
|
||||||
return self._query_compiler.columns[0]
|
return self._query_compiler.columns[0]
|
||||||
|
|
||||||
name = property(_get_name)
|
def _set_name(self, name):
|
||||||
|
self._query_compiler.rename({self.name: name}, inplace=True)
|
||||||
|
|
||||||
|
name = property(_get_name, _set_name)
|
||||||
|
|
||||||
def rename(self, new_name):
|
def rename(self, new_name):
|
||||||
"""
|
"""
|
||||||
ONLY COLUMN rename supported
|
Rename name of series. Only column rename is supported. This does not change the underlying
|
||||||
|
Elasticsearch index, but adds a soft link from the new name (column) to the Elasticsearch field name
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
new_name
|
new_name: str
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
|
eland.Series
|
||||||
|
eland.Series with new name.
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
:pandas_api_docs:pandas.Series.rename
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> df.Carrier
|
||||||
|
0 Kibana Airlines
|
||||||
|
1 Logstash Airways
|
||||||
|
2 Logstash Airways
|
||||||
|
3 Kibana Airlines
|
||||||
|
4 Kibana Airlines
|
||||||
|
...
|
||||||
|
13054 Logstash Airways
|
||||||
|
13055 Logstash Airways
|
||||||
|
13056 Logstash Airways
|
||||||
|
13057 JetBeats
|
||||||
|
13058 JetBeats
|
||||||
|
Name: Carrier, Length: 13059, dtype: object
|
||||||
|
>>> df.Carrier.rename('Airline')
|
||||||
|
0 Kibana Airlines
|
||||||
|
1 Logstash Airways
|
||||||
|
2 Logstash Airways
|
||||||
|
3 Kibana Airlines
|
||||||
|
4 Kibana Airlines
|
||||||
|
...
|
||||||
|
13054 Logstash Airways
|
||||||
|
13055 Logstash Airways
|
||||||
|
13056 Logstash Airways
|
||||||
|
13057 JetBeats
|
||||||
|
13058 JetBeats
|
||||||
|
Name: Airline, Length: 13059, dtype: object
|
||||||
"""
|
"""
|
||||||
return Series(query_compiler=self._query_compiler.rename({self.name: new_name}))
|
return Series(query_compiler=self._query_compiler.rename({self.name: new_name}))
|
||||||
|
|
||||||
@ -312,11 +352,25 @@ class Series(NDFrame):
|
|||||||
# Check compatibility
|
# Check compatibility
|
||||||
self._query_compiler.check_arithmetics(right._query_compiler)
|
self._query_compiler.check_arithmetics(right._query_compiler)
|
||||||
|
|
||||||
field_name = "{0}_{1}_{2}".format(self.name, "truediv", right.name)
|
new_field_name = "{0}_{1}_{2}".format(self.name, "truediv", right.name)
|
||||||
|
|
||||||
# Compatible, so create new Series
|
# Compatible, so create new Series
|
||||||
return Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
||||||
field_name, 'truediv', self.name, right.name))
|
new_field_name, 'truediv', self.name, right.name))
|
||||||
|
series.name = None
|
||||||
|
|
||||||
|
return series
|
||||||
|
elif isinstance(right, (int, float)): # TODO extend to numpy types
|
||||||
|
new_field_name = "{0}_{1}_{2}".format(self.name, "truediv", str(right).replace('.','_'))
|
||||||
|
|
||||||
|
# Compatible, so create new Series
|
||||||
|
series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
|
||||||
|
new_field_name, 'truediv', self.name, float(right))) # force rhs to float
|
||||||
|
|
||||||
|
# name of Series remains original name
|
||||||
|
series.name = self.name
|
||||||
|
|
||||||
|
return series
|
||||||
else:
|
else:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"Can only perform arithmetic operation on selected types "
|
"Can only perform arithmetic operation on selected types "
|
||||||
|
@ -53,23 +53,23 @@ class TestQueryCompilerRename(TestData):
|
|||||||
update_A = {'a' : 'A'}
|
update_A = {'a' : 'A'}
|
||||||
mapper.rename_display_name(update_A)
|
mapper.rename_display_name(update_A)
|
||||||
|
|
||||||
assert display_names == mapper.display_names(columns)
|
assert display_names == mapper.field_to_display_names(columns)
|
||||||
|
|
||||||
# Invalid update
|
# Invalid update
|
||||||
display_names = ['A', 'b', 'c', 'd']
|
display_names = ['A', 'b', 'c', 'd']
|
||||||
update_ZZ = {'a' : 'ZZ'}
|
update_ZZ = {'a' : 'ZZ'}
|
||||||
mapper.rename_display_name(update_ZZ)
|
mapper.rename_display_name(update_ZZ)
|
||||||
|
|
||||||
assert display_names == mapper.display_names(columns)
|
assert display_names == mapper.field_to_display_names(columns)
|
||||||
|
|
||||||
display_names = ['AA', 'b', 'c', 'd']
|
display_names = ['AA', 'b', 'c', 'd']
|
||||||
update_AA = {'A' : 'AA'} # already renamed to 'A'
|
update_AA = {'A' : 'AA'} # already renamed to 'A'
|
||||||
mapper.rename_display_name(update_AA)
|
mapper.rename_display_name(update_AA)
|
||||||
|
|
||||||
assert display_names == mapper.display_names(columns)
|
assert display_names == mapper.field_to_display_names(columns)
|
||||||
|
|
||||||
display_names = ['AA', 'b', 'C', 'd']
|
display_names = ['AA', 'b', 'C', 'd']
|
||||||
update_AA_C = {'a' : 'AA', 'c' : 'C'} # 'a' rename ignored
|
update_AA_C = {'a' : 'AA', 'c' : 'C'} # 'a' rename ignored
|
||||||
mapper.rename_display_name(update_AA_C)
|
mapper.rename_display_name(update_AA_C)
|
||||||
|
|
||||||
assert display_names == mapper.display_names(columns)
|
assert display_names == mapper.field_to_display_names(columns)
|
||||||
|
@ -20,10 +20,7 @@ class TestSeriesArithmetics(TestData):
|
|||||||
ed_df = self.ed_ecommerce()
|
ed_df = self.ed_ecommerce()
|
||||||
|
|
||||||
pd_avg_price = pd_df['total_quantity'] / pd_df['taxful_total_price']
|
pd_avg_price = pd_df['total_quantity'] / pd_df['taxful_total_price']
|
||||||
print(pd_avg_price) # this has None as name
|
|
||||||
|
|
||||||
ed_avg_price = ed_df['total_quantity'] / ed_df['taxful_total_price']
|
ed_avg_price = ed_df['total_quantity'] / ed_df['taxful_total_price']
|
||||||
print(ed_avg_price)
|
|
||||||
|
|
||||||
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
||||||
|
|
||||||
@ -32,19 +29,15 @@ class TestSeriesArithmetics(TestData):
|
|||||||
ed_df = self.ed_ecommerce()
|
ed_df = self.ed_ecommerce()
|
||||||
|
|
||||||
pd_avg_price = pd_df['total_quantity'] / 10.0
|
pd_avg_price = pd_df['total_quantity'] / 10.0
|
||||||
print(pd_avg_price)
|
|
||||||
|
|
||||||
ed_avg_price = ed_df['total_quantity'] / 10.0
|
ed_avg_price = ed_df['total_quantity'] / 10.0
|
||||||
print(ed_avg_price)
|
|
||||||
|
|
||||||
def test_ecommerce_series_div_other(self):
|
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
||||||
|
|
||||||
|
def test_ecommerce_series_div_int(self):
|
||||||
|
pd_df = self.pd_ecommerce()
|
||||||
ed_df = self.ed_ecommerce()
|
ed_df = self.ed_ecommerce()
|
||||||
|
|
||||||
ed_s1 = ed_df.total_quantity
|
pd_avg_price = pd_df['total_quantity'] / int(10)
|
||||||
ed_s2 = ed_df.taxful_total_price
|
ed_avg_price = ed_df['total_quantity'] / int(10)
|
||||||
|
|
||||||
print(ed_s1)
|
assert_pandas_eland_series_equal(pd_avg_price, ed_avg_price, check_less_precise=True)
|
||||||
print(ed_s2)
|
|
||||||
|
|
||||||
print(ed_s1)
|
|
||||||
print(ed_s2)
|
|
||||||
|
32
eland/tests/series/test_name_pytest.py
Normal file
32
eland/tests/series/test_name_pytest.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
# File called _pytest for PyCharm compatability
|
||||||
|
import eland as ed
|
||||||
|
from eland.tests import ELASTICSEARCH_HOST
|
||||||
|
from eland.tests import FLIGHTS_INDEX_NAME
|
||||||
|
from eland.tests.common import TestData
|
||||||
|
from eland.tests.common import assert_pandas_eland_series_equal
|
||||||
|
|
||||||
|
|
||||||
|
class TestSeriesName(TestData):
|
||||||
|
|
||||||
|
def test_name(self):
|
||||||
|
# deep copy pandas DataFrame as .name alters this reference frame
|
||||||
|
pd_series = self.pd_flights()['Carrier'].copy(deep=True)
|
||||||
|
ed_series = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'Carrier')
|
||||||
|
|
||||||
|
assert_pandas_eland_series_equal(pd_series, ed_series)
|
||||||
|
assert ed_series.name == pd_series.name
|
||||||
|
|
||||||
|
pd_series.name = "renamed1"
|
||||||
|
ed_series.name = "renamed1"
|
||||||
|
|
||||||
|
assert_pandas_eland_series_equal(pd_series, ed_series)
|
||||||
|
assert ed_series.name == pd_series.name
|
||||||
|
|
||||||
|
pd_series.name = "renamed2"
|
||||||
|
ed_series.name = "renamed2"
|
||||||
|
|
||||||
|
assert_pandas_eland_series_equal(pd_series, ed_series)
|
||||||
|
assert ed_series.name == pd_series.name
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user