Better Handling of Non Aggregatable Fields (#85)

* updates ecommerce mapping to include non-aggregatable text field

* updates exists tests and adds new tests for non-aggregatable field

* better handling on non-aggregatable fields

* fixes formatting

* swaps series in assertion

* adds newline
This commit is contained in:
Michael Hirsch 2019-12-06 03:20:09 -05:00 committed by stevedodson
parent 99bfea42b6
commit f263e21b8a
8 changed files with 41 additions and 14 deletions

View File

@ -373,7 +373,11 @@ class Mappings:
aggregatable: bool
Is the field aggregatable in Elasticsearch?
"""
return self._mappings_capabilities.loc[field_name]
try:
field_capabilities = self._mappings_capabilities.loc[field_name]
except KeyError:
field_capabilities = pd.Series()
return field_capabilities
def get_date_field_format(self, field_name):
"""
@ -447,9 +451,7 @@ class Mappings:
"""
if field_names is None:
field_names = self.source_fields()
aggregatables = {}
for field_name in field_names:
capabilities = self.field_capabilities(field_name)
if capabilities['aggregatable']:
@ -458,11 +460,11 @@ class Mappings:
# Try 'field_name.keyword'
field_name_keyword = field_name + '.keyword'
capabilities = self.field_capabilities(field_name_keyword)
if capabilities['aggregatable']:
if not capabilities.empty and capabilities.get('aggregatable'):
aggregatables[field_name_keyword] = field_name
else:
# Aggregations not supported for this field
raise ValueError("Aggregations not supported for ", field_name)
if not aggregatables:
raise ValueError("Aggregations not supported for ", field_name)
return aggregatables

View File

@ -249,7 +249,7 @@ class Operations:
results = {}
for key, value in aggregatable_field_names.items():
for bucket in response['aggregations'][field_names[0]]['buckets']:
for bucket in response['aggregations'][key]['buckets']:
results[bucket['key']] = bucket['doc_count']
try:

View File

@ -1051,7 +1051,10 @@ class Series(NDFrame):
else:
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
raise TypeError("Unsupported operation: '{}' {} '{}'".format(self._dtype, method_name, right._dtype))
raise TypeError(
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
.format(method_name, type(self), self._dtype, type(right).__name__)
)
# check left number and right numeric series
elif np.issubdtype(np.dtype(type(right)), np.number) and np.issubdtype(self._dtype, np.number):
@ -1085,7 +1088,8 @@ class Series(NDFrame):
else:
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
raise TypeError(
"unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(right))
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
.format(method_name, type(self), self._dtype, type(right).__name__)
)
def _numeric_rop(self, left, method_name, op_type=None):
@ -1127,7 +1131,8 @@ class Series(NDFrame):
else:
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
raise TypeError(
"unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(left))
"unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
.format(op_method_name, type(self), self._dtype, type(left).__name__)
)
def max(self):

View File

@ -147,7 +147,7 @@ ECOMMERCE_MAPPING = {"mappings": {
}
},
"customer_gender": {
"type": "keyword"
"type": "text"
},
"customer_id": {
"type": "keyword"

View File

@ -22,7 +22,7 @@ class TestDataFrameNUnique(TestData):
# assert_series_equal(pd_nunique, ed_nunique)
def test_ecommerce_nunique(self):
columns = ['customer_first_name', 'customer_gender', 'day_of_week_i']
columns = ['customer_first_name', 'customer_last_name', 'day_of_week_i']
pd_ecommerce = self.pd_ecommerce()[columns]
ed_ecommerce = self.ed_ecommerce()[columns]

View File

@ -15,7 +15,6 @@ class TestMappingsAggregatables(TestData):
'customer_birth_date': 'customer_birth_date',
'customer_first_name.keyword': 'customer_first_name',
'customer_full_name.keyword': 'customer_full_name',
'customer_gender': 'customer_gender',
'customer_id': 'customer_id',
'customer_last_name.keyword': 'customer_last_name',
'customer_phone': 'customer_phone',

View File

@ -38,3 +38,19 @@ class TestSeriesArithmetics(TestData):
pdadd = "The last name is: " + self.pd_ecommerce()['customer_last_name']
assert_pandas_eland_series_equal(pdadd, edadd)
def test_non_aggregatable_add_str(self):
with pytest.raises(ValueError):
assert self.ed_ecommerce()['customer_gender'] + "is the gender"
def teststr_add_non_aggregatable(self):
with pytest.raises(ValueError):
assert "The gender is: " + self.ed_ecommerce()['customer_gender']
def test_non_aggregatable_add_aggregatable(self):
with pytest.raises(ValueError):
assert self.ed_ecommerce()['customer_gender'] + self.ed_ecommerce()['customer_first_name']
def test_aggregatable_add_non_aggregatable(self):
with pytest.raises(ValueError):
assert self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_gender']

View File

@ -45,3 +45,8 @@ class TestSeriesValueCounts(TestData):
ed_s = self.ed_flights()['Carrier']
with pytest.raises(ValueError):
assert ed_s.value_counts(es_size=-9)
def test_value_counts_non_aggregatable(self):
ed_s = self.ed_ecommerce()['customer_gender']
with pytest.raises(ValueError):
assert ed_s.value_counts()