mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Better Handling of Non Aggregatable Fields (#85)
* updates ecommerce mapping to include non-aggregatable text field * updates exists tests and adds new tests for non-aggregatable field * better handling on non-aggregatable fields * fixes formatting * swaps series in assertion * adds newline
This commit is contained in:
parent
99bfea42b6
commit
f263e21b8a
@ -373,7 +373,11 @@ class Mappings:
|
|||||||
aggregatable: bool
|
aggregatable: bool
|
||||||
Is the field aggregatable in Elasticsearch?
|
Is the field aggregatable in Elasticsearch?
|
||||||
"""
|
"""
|
||||||
return self._mappings_capabilities.loc[field_name]
|
try:
|
||||||
|
field_capabilities = self._mappings_capabilities.loc[field_name]
|
||||||
|
except KeyError:
|
||||||
|
field_capabilities = pd.Series()
|
||||||
|
return field_capabilities
|
||||||
|
|
||||||
def get_date_field_format(self, field_name):
|
def get_date_field_format(self, field_name):
|
||||||
"""
|
"""
|
||||||
@ -447,9 +451,7 @@ class Mappings:
|
|||||||
"""
|
"""
|
||||||
if field_names is None:
|
if field_names is None:
|
||||||
field_names = self.source_fields()
|
field_names = self.source_fields()
|
||||||
|
|
||||||
aggregatables = {}
|
aggregatables = {}
|
||||||
|
|
||||||
for field_name in field_names:
|
for field_name in field_names:
|
||||||
capabilities = self.field_capabilities(field_name)
|
capabilities = self.field_capabilities(field_name)
|
||||||
if capabilities['aggregatable']:
|
if capabilities['aggregatable']:
|
||||||
@ -458,11 +460,11 @@ class Mappings:
|
|||||||
# Try 'field_name.keyword'
|
# Try 'field_name.keyword'
|
||||||
field_name_keyword = field_name + '.keyword'
|
field_name_keyword = field_name + '.keyword'
|
||||||
capabilities = self.field_capabilities(field_name_keyword)
|
capabilities = self.field_capabilities(field_name_keyword)
|
||||||
if capabilities['aggregatable']:
|
if not capabilities.empty and capabilities.get('aggregatable'):
|
||||||
aggregatables[field_name_keyword] = field_name
|
aggregatables[field_name_keyword] = field_name
|
||||||
else:
|
|
||||||
# Aggregations not supported for this field
|
if not aggregatables:
|
||||||
raise ValueError("Aggregations not supported for ", field_name)
|
raise ValueError("Aggregations not supported for ", field_name)
|
||||||
|
|
||||||
return aggregatables
|
return aggregatables
|
||||||
|
|
||||||
|
@ -249,7 +249,7 @@ class Operations:
|
|||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
for key, value in aggregatable_field_names.items():
|
for key, value in aggregatable_field_names.items():
|
||||||
for bucket in response['aggregations'][field_names[0]]['buckets']:
|
for bucket in response['aggregations'][key]['buckets']:
|
||||||
results[bucket['key']] = bucket['doc_count']
|
results[bucket['key']] = bucket['doc_count']
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -1051,7 +1051,10 @@ class Series(NDFrame):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||||
raise TypeError("Unsupported operation: '{}' {} '{}'".format(self._dtype, method_name, right._dtype))
|
raise TypeError(
|
||||||
|
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||||
|
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||||
|
)
|
||||||
|
|
||||||
# check left number and right numeric series
|
# check left number and right numeric series
|
||||||
elif np.issubdtype(np.dtype(type(right)), np.number) and np.issubdtype(self._dtype, np.number):
|
elif np.issubdtype(np.dtype(type(right)), np.number) and np.issubdtype(self._dtype, np.number):
|
||||||
@ -1085,7 +1088,8 @@ class Series(NDFrame):
|
|||||||
else:
|
else:
|
||||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(right))
|
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||||
|
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||||
)
|
)
|
||||||
|
|
||||||
def _numeric_rop(self, left, method_name, op_type=None):
|
def _numeric_rop(self, left, method_name, op_type=None):
|
||||||
@ -1127,7 +1131,8 @@ class Series(NDFrame):
|
|||||||
else:
|
else:
|
||||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(left))
|
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||||
|
.format(op_method_name, type(self), self._dtype, type(left).__name__)
|
||||||
)
|
)
|
||||||
|
|
||||||
def max(self):
|
def max(self):
|
||||||
|
@ -147,7 +147,7 @@ ECOMMERCE_MAPPING = {"mappings": {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"customer_gender": {
|
"customer_gender": {
|
||||||
"type": "keyword"
|
"type": "text"
|
||||||
},
|
},
|
||||||
"customer_id": {
|
"customer_id": {
|
||||||
"type": "keyword"
|
"type": "keyword"
|
||||||
|
@ -22,7 +22,7 @@ class TestDataFrameNUnique(TestData):
|
|||||||
# assert_series_equal(pd_nunique, ed_nunique)
|
# assert_series_equal(pd_nunique, ed_nunique)
|
||||||
|
|
||||||
def test_ecommerce_nunique(self):
|
def test_ecommerce_nunique(self):
|
||||||
columns = ['customer_first_name', 'customer_gender', 'day_of_week_i']
|
columns = ['customer_first_name', 'customer_last_name', 'day_of_week_i']
|
||||||
pd_ecommerce = self.pd_ecommerce()[columns]
|
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||||
ed_ecommerce = self.ed_ecommerce()[columns]
|
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||||
|
|
||||||
|
@ -15,7 +15,6 @@ class TestMappingsAggregatables(TestData):
|
|||||||
'customer_birth_date': 'customer_birth_date',
|
'customer_birth_date': 'customer_birth_date',
|
||||||
'customer_first_name.keyword': 'customer_first_name',
|
'customer_first_name.keyword': 'customer_first_name',
|
||||||
'customer_full_name.keyword': 'customer_full_name',
|
'customer_full_name.keyword': 'customer_full_name',
|
||||||
'customer_gender': 'customer_gender',
|
|
||||||
'customer_id': 'customer_id',
|
'customer_id': 'customer_id',
|
||||||
'customer_last_name.keyword': 'customer_last_name',
|
'customer_last_name.keyword': 'customer_last_name',
|
||||||
'customer_phone': 'customer_phone',
|
'customer_phone': 'customer_phone',
|
||||||
|
@ -38,3 +38,19 @@ class TestSeriesArithmetics(TestData):
|
|||||||
pdadd = "The last name is: " + self.pd_ecommerce()['customer_last_name']
|
pdadd = "The last name is: " + self.pd_ecommerce()['customer_last_name']
|
||||||
|
|
||||||
assert_pandas_eland_series_equal(pdadd, edadd)
|
assert_pandas_eland_series_equal(pdadd, edadd)
|
||||||
|
|
||||||
|
def test_non_aggregatable_add_str(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
assert self.ed_ecommerce()['customer_gender'] + "is the gender"
|
||||||
|
|
||||||
|
def teststr_add_non_aggregatable(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
assert "The gender is: " + self.ed_ecommerce()['customer_gender']
|
||||||
|
|
||||||
|
def test_non_aggregatable_add_aggregatable(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
assert self.ed_ecommerce()['customer_gender'] + self.ed_ecommerce()['customer_first_name']
|
||||||
|
|
||||||
|
def test_aggregatable_add_non_aggregatable(self):
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
assert self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_gender']
|
||||||
|
@ -45,3 +45,8 @@ class TestSeriesValueCounts(TestData):
|
|||||||
ed_s = self.ed_flights()['Carrier']
|
ed_s = self.ed_flights()['Carrier']
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
assert ed_s.value_counts(es_size=-9)
|
assert ed_s.value_counts(es_size=-9)
|
||||||
|
|
||||||
|
def test_value_counts_non_aggregatable(self):
|
||||||
|
ed_s = self.ed_ecommerce()['customer_gender']
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
assert ed_s.value_counts()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user