mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
* updates ecommerce mapping to include non-aggregatable text field * updates exists tests and adds new tests for non-aggregatable field * better handling on non-aggregatable fields * fixes formatting * swaps series in assertion * adds newline
33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
# File called _pytest for PyCharm compatability
|
|
|
|
from pandas.util.testing import assert_series_equal
|
|
|
|
from eland.tests.common import TestData
|
|
|
|
|
|
class TestDataFrameNUnique(TestData):
|
|
|
|
def test_flights_nunique(self):
|
|
# Note pandas.nunique fails for dict columns (e.g. DestLocation)
|
|
columns = ['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID', 'DestCityName']
|
|
pd_flights = self.pd_flights()[columns]
|
|
ed_flights = self.ed_flights()[columns]
|
|
|
|
pd_nunique = pd_flights.nunique()
|
|
ed_nunique = ed_flights.nunique()
|
|
|
|
# TODO - ES is approximate counts so these aren't equal...
|
|
# E[left]: [13059, 2, 4, 156, 156, 143]
|
|
# E[right]: [13132, 2, 4, 156, 156, 143]
|
|
# assert_series_equal(pd_nunique, ed_nunique)
|
|
|
|
def test_ecommerce_nunique(self):
|
|
columns = ['customer_first_name', 'customer_last_name', 'day_of_week_i']
|
|
pd_ecommerce = self.pd_ecommerce()[columns]
|
|
ed_ecommerce = self.ed_ecommerce()[columns]
|
|
|
|
pd_nunique = pd_ecommerce.nunique()
|
|
ed_nunique = ed_ecommerce.nunique()
|
|
|
|
assert_series_equal(pd_nunique, ed_nunique)
|