eland/eland/tests/dataframe/test_nunique_pytest.py
Michael Hirsch f263e21b8a Better Handling of Non Aggregatable Fields (#85)
* updates ecommerce mapping to include non-aggregatable text field

* updates exists tests and adds new tests for non-aggregatable field

* better handling on non-aggregatable fields

* fixes formatting

* swaps series in assertion

* adds newline
2019-12-06 08:20:09 +00:00

33 lines
1.1 KiB
Python

# File called _pytest for PyCharm compatability
from pandas.util.testing import assert_series_equal
from eland.tests.common import TestData
class TestDataFrameNUnique(TestData):
def test_flights_nunique(self):
# Note pandas.nunique fails for dict columns (e.g. DestLocation)
columns = ['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID', 'DestCityName']
pd_flights = self.pd_flights()[columns]
ed_flights = self.ed_flights()[columns]
pd_nunique = pd_flights.nunique()
ed_nunique = ed_flights.nunique()
# TODO - ES is approximate counts so these aren't equal...
# E[left]: [13059, 2, 4, 156, 156, 143]
# E[right]: [13132, 2, 4, 156, 156, 143]
# assert_series_equal(pd_nunique, ed_nunique)
def test_ecommerce_nunique(self):
columns = ['customer_first_name', 'customer_last_name', 'day_of_week_i']
pd_ecommerce = self.pd_ecommerce()[columns]
ed_ecommerce = self.ed_ecommerce()[columns]
pd_nunique = pd_ecommerce.nunique()
ed_nunique = ed_ecommerce.nunique()
assert_series_equal(pd_nunique, ed_nunique)