mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Better Handling of Non Aggregatable Fields (#85)
* updates ecommerce mapping to include non-aggregatable text field * updates exists tests and adds new tests for non-aggregatable field * better handling on non-aggregatable fields * fixes formatting * swaps series in assertion * adds newline
This commit is contained in:
parent
99bfea42b6
commit
f263e21b8a
@ -373,7 +373,11 @@ class Mappings:
|
||||
aggregatable: bool
|
||||
Is the field aggregatable in Elasticsearch?
|
||||
"""
|
||||
return self._mappings_capabilities.loc[field_name]
|
||||
try:
|
||||
field_capabilities = self._mappings_capabilities.loc[field_name]
|
||||
except KeyError:
|
||||
field_capabilities = pd.Series()
|
||||
return field_capabilities
|
||||
|
||||
def get_date_field_format(self, field_name):
|
||||
"""
|
||||
@ -447,9 +451,7 @@ class Mappings:
|
||||
"""
|
||||
if field_names is None:
|
||||
field_names = self.source_fields()
|
||||
|
||||
aggregatables = {}
|
||||
|
||||
for field_name in field_names:
|
||||
capabilities = self.field_capabilities(field_name)
|
||||
if capabilities['aggregatable']:
|
||||
@ -458,11 +460,11 @@ class Mappings:
|
||||
# Try 'field_name.keyword'
|
||||
field_name_keyword = field_name + '.keyword'
|
||||
capabilities = self.field_capabilities(field_name_keyword)
|
||||
if capabilities['aggregatable']:
|
||||
if not capabilities.empty and capabilities.get('aggregatable'):
|
||||
aggregatables[field_name_keyword] = field_name
|
||||
else:
|
||||
# Aggregations not supported for this field
|
||||
raise ValueError("Aggregations not supported for ", field_name)
|
||||
|
||||
if not aggregatables:
|
||||
raise ValueError("Aggregations not supported for ", field_name)
|
||||
|
||||
return aggregatables
|
||||
|
||||
|
@ -249,7 +249,7 @@ class Operations:
|
||||
results = {}
|
||||
|
||||
for key, value in aggregatable_field_names.items():
|
||||
for bucket in response['aggregations'][field_names[0]]['buckets']:
|
||||
for bucket in response['aggregations'][key]['buckets']:
|
||||
results[bucket['key']] = bucket['doc_count']
|
||||
|
||||
try:
|
||||
|
@ -1051,7 +1051,10 @@ class Series(NDFrame):
|
||||
|
||||
else:
|
||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||
raise TypeError("Unsupported operation: '{}' {} '{}'".format(self._dtype, method_name, right._dtype))
|
||||
raise TypeError(
|
||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||
)
|
||||
|
||||
# check left number and right numeric series
|
||||
elif np.issubdtype(np.dtype(type(right)), np.number) and np.issubdtype(self._dtype, np.number):
|
||||
@ -1085,7 +1088,8 @@ class Series(NDFrame):
|
||||
else:
|
||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||
raise TypeError(
|
||||
"unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(right))
|
||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||
.format(method_name, type(self), self._dtype, type(right).__name__)
|
||||
)
|
||||
|
||||
def _numeric_rop(self, left, method_name, op_type=None):
|
||||
@ -1127,7 +1131,8 @@ class Series(NDFrame):
|
||||
else:
|
||||
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
|
||||
raise TypeError(
|
||||
"unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(left))
|
||||
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
|
||||
.format(op_method_name, type(self), self._dtype, type(left).__name__)
|
||||
)
|
||||
|
||||
def max(self):
|
||||
|
@ -147,7 +147,7 @@ ECOMMERCE_MAPPING = {"mappings": {
|
||||
}
|
||||
},
|
||||
"customer_gender": {
|
||||
"type": "keyword"
|
||||
"type": "text"
|
||||
},
|
||||
"customer_id": {
|
||||
"type": "keyword"
|
||||
|
@ -22,7 +22,7 @@ class TestDataFrameNUnique(TestData):
|
||||
# assert_series_equal(pd_nunique, ed_nunique)
|
||||
|
||||
def test_ecommerce_nunique(self):
|
||||
columns = ['customer_first_name', 'customer_gender', 'day_of_week_i']
|
||||
columns = ['customer_first_name', 'customer_last_name', 'day_of_week_i']
|
||||
pd_ecommerce = self.pd_ecommerce()[columns]
|
||||
ed_ecommerce = self.ed_ecommerce()[columns]
|
||||
|
||||
|
@ -15,7 +15,6 @@ class TestMappingsAggregatables(TestData):
|
||||
'customer_birth_date': 'customer_birth_date',
|
||||
'customer_first_name.keyword': 'customer_first_name',
|
||||
'customer_full_name.keyword': 'customer_full_name',
|
||||
'customer_gender': 'customer_gender',
|
||||
'customer_id': 'customer_id',
|
||||
'customer_last_name.keyword': 'customer_last_name',
|
||||
'customer_phone': 'customer_phone',
|
||||
|
@ -38,3 +38,19 @@ class TestSeriesArithmetics(TestData):
|
||||
pdadd = "The last name is: " + self.pd_ecommerce()['customer_last_name']
|
||||
|
||||
assert_pandas_eland_series_equal(pdadd, edadd)
|
||||
|
||||
def test_non_aggregatable_add_str(self):
|
||||
with pytest.raises(ValueError):
|
||||
assert self.ed_ecommerce()['customer_gender'] + "is the gender"
|
||||
|
||||
def teststr_add_non_aggregatable(self):
|
||||
with pytest.raises(ValueError):
|
||||
assert "The gender is: " + self.ed_ecommerce()['customer_gender']
|
||||
|
||||
def test_non_aggregatable_add_aggregatable(self):
|
||||
with pytest.raises(ValueError):
|
||||
assert self.ed_ecommerce()['customer_gender'] + self.ed_ecommerce()['customer_first_name']
|
||||
|
||||
def test_aggregatable_add_non_aggregatable(self):
|
||||
with pytest.raises(ValueError):
|
||||
assert self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_gender']
|
||||
|
@ -45,3 +45,8 @@ class TestSeriesValueCounts(TestData):
|
||||
ed_s = self.ed_flights()['Carrier']
|
||||
with pytest.raises(ValueError):
|
||||
assert ed_s.value_counts(es_size=-9)
|
||||
|
||||
def test_value_counts_non_aggregatable(self):
|
||||
ed_s = self.ed_ecommerce()['customer_gender']
|
||||
with pytest.raises(ValueError):
|
||||
assert ed_s.value_counts()
|
||||
|
Loading…
x
Reference in New Issue
Block a user