Extending metrics tests based on comments on https://github.com/elastic/eland/pull/51

This commit is contained in:
Stephen Dodson 2019-11-19 13:49:34 +00:00
parent 2854eecbf4
commit 8f31c04c07
2 changed files with 19 additions and 41 deletions

View File

@ -151,8 +151,6 @@ class Operations:
for field in numeric_source_fields: for field in numeric_source_fields:
body.metric_aggs(field, func, field) body.metric_aggs(field, func, field)
#print(body.to_search_body(), columns, numeric_source_fields)
response = query_compiler._client.search( response = query_compiler._client.search(
index=query_compiler._index_pattern, index=query_compiler._index_pattern,
size=0, size=0,

View File

@ -9,52 +9,30 @@ import eland as ed
class TestDataFrameMetrics(TestData): class TestDataFrameMetrics(TestData):
def test_mean(self): funcs = ['max', 'min', 'mean', 'sum']
def test_flights_metrics(self):
pd_flights = self.pd_flights() pd_flights = self.pd_flights()
ed_flights = self.ed_flights() ed_flights = self.ed_flights()
pd_mean = pd_flights.mean(numeric_only=True) for func in self.funcs:
ed_mean = ed_flights.mean(numeric_only=True) pd_metric = getattr(pd_flights, func)(numeric_only=True)
ed_metric = getattr(ed_flights, func)(numeric_only=True)
assert_series_equal(pd_mean, ed_mean) assert_series_equal(pd_metric, ed_metric)
def test_sum(self): def test_ecommerce_selected_non_numeric_source_fields(self):
pd_flights = self.pd_flights()
ed_flights = self.ed_flights()
pd_sum = pd_flights.sum(numeric_only=True)
ed_sum = ed_flights.sum(numeric_only=True)
assert_series_equal(pd_sum, ed_sum)
def test_min(self):
pd_flights = self.pd_flights()
ed_flights = self.ed_flights()
pd_min = pd_flights.min(numeric_only=True)
ed_min = ed_flights.min(numeric_only=True)
assert_series_equal(pd_min, ed_min)
def test_max(self):
pd_flights = self.pd_flights()
ed_flights = self.ed_flights()
pd_max = pd_flights.max(numeric_only=True)
ed_max = ed_flights.max(numeric_only=True)
assert_series_equal(pd_max, ed_max)
def test_ecommerce_selected_non_numeric_source_fields_max(self):
# None of these are numeric # None of these are numeric
columns = ['category', 'currency', 'customer_birth_date', 'customer_first_name', 'user'] columns = ['category', 'currency', 'customer_birth_date', 'customer_first_name', 'user']
pd_ecommerce = self.pd_ecommerce()[columns] pd_ecommerce = self.pd_ecommerce()[columns]
ed_ecommerce = self.ed_ecommerce()[columns] ed_ecommerce = self.ed_ecommerce()[columns]
assert_series_equal(pd_ecommerce.max(numeric_only=True), ed_ecommerce.max(numeric_only=True)) for func in self.funcs:
assert_series_equal(getattr(pd_ecommerce, func)(numeric_only=True), getattr(ed_ecommerce, func)(numeric_only=True),
check_less_precise=True)
def test_ecommerce_selected_mixed_numeric_source_fields_max(self): def test_ecommerce_selected_mixed_numeric_source_fields(self):
# Some of these are numeric # Some of these are numeric
columns = ['category', 'currency', 'taxless_total_price', 'customer_birth_date', columns = ['category', 'currency', 'taxless_total_price', 'customer_birth_date',
'total_quantity', 'customer_first_name', 'user'] 'total_quantity', 'customer_first_name', 'user']
@ -62,16 +40,18 @@ class TestDataFrameMetrics(TestData):
pd_ecommerce = self.pd_ecommerce()[columns] pd_ecommerce = self.pd_ecommerce()[columns]
ed_ecommerce = self.ed_ecommerce()[columns] ed_ecommerce = self.ed_ecommerce()[columns]
assert_series_equal(pd_ecommerce.max(numeric_only=True), ed_ecommerce.max(numeric_only=True), for func in self.funcs:
assert_series_equal(getattr(pd_ecommerce, func)(numeric_only=True), getattr(ed_ecommerce, func)(numeric_only=True),
check_less_precise=True) check_less_precise=True)
def test_ecommerce_selected_all_numeric_source_fields_max(self): def test_ecommerce_selected_all_numeric_source_fields(self):
# All of these are numeric # All of these are numeric
columns = ['total_quantity', 'taxful_total_price', 'taxless_total_price'] columns = ['total_quantity', 'taxful_total_price', 'taxless_total_price']
pd_ecommerce = self.pd_ecommerce()[columns] pd_ecommerce = self.pd_ecommerce()[columns]
ed_ecommerce = self.ed_ecommerce()[columns] ed_ecommerce = self.ed_ecommerce()[columns]
assert_series_equal(pd_ecommerce.max(numeric_only=True), ed_ecommerce.max(numeric_only=True), for func in self.funcs:
assert_series_equal(getattr(pd_ecommerce, func)(numeric_only=True), getattr(ed_ecommerce, func)(numeric_only=True),
check_less_precise=True) check_less_precise=True)