mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
94 lines
3.3 KiB
Python
94 lines
3.3 KiB
Python
import os
|
|
|
|
import pandas as pd
|
|
from pandas.util.testing import (assert_frame_equal, assert_series_equal)
|
|
|
|
import eland as ed
|
|
|
|
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
# Create pandas and eland data frames
|
|
from eland.tests import ES_TEST_CLIENT
|
|
from eland.tests import FLIGHTS_DF_FILE_NAME, FLIGHTS_INDEX_NAME, \
|
|
FLIGHTS_SMALL_INDEX_NAME, \
|
|
ECOMMERCE_DF_FILE_NAME, ECOMMERCE_INDEX_NAME
|
|
|
|
_pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index()
|
|
_pd_flights['timestamp'] = \
|
|
pd.to_datetime(_pd_flights['timestamp'])
|
|
_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
|
|
_ed_flights = ed.read_es(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
|
|
|
|
_pd_flights_small = _pd_flights.head(48)
|
|
_ed_flights_small = ed.read_es(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME)
|
|
|
|
_pd_ecommerce = pd.read_json(ECOMMERCE_DF_FILE_NAME).sort_index()
|
|
_pd_ecommerce['order_date'] = \
|
|
pd.to_datetime(_pd_ecommerce['order_date'])
|
|
_pd_ecommerce['products.created_on'] = \
|
|
_pd_ecommerce['products.created_on'].apply(lambda x: pd.to_datetime(x))
|
|
_pd_ecommerce.insert(2, 'customer_birth_date', None)
|
|
_pd_ecommerce.index = _pd_ecommerce.index.map(str) # make index 'object' not int
|
|
_pd_ecommerce['customer_birth_date'].astype('datetime64')
|
|
_ed_ecommerce = ed.read_es(ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME)
|
|
|
|
|
|
class TestData:
|
|
|
|
def pd_flights(self):
|
|
return _pd_flights
|
|
|
|
def ed_flights(self):
|
|
return _ed_flights
|
|
|
|
def pd_flights_small(self):
|
|
return _pd_flights_small
|
|
|
|
def ed_flights_small(self):
|
|
return _ed_flights_small
|
|
|
|
def pd_ecommerce(self):
|
|
return _pd_ecommerce
|
|
|
|
def ed_ecommerce(self):
|
|
return _ed_ecommerce
|
|
|
|
|
|
def assert_pandas_eland_frame_equal(left, right):
|
|
if not isinstance(left, pd.DataFrame):
|
|
raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
|
|
exp_type='pd.DataFrame', act_type=type(left)))
|
|
|
|
if not isinstance(right, ed.DataFrame):
|
|
raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
|
|
exp_type='ed.DataFrame', act_type=type(right)))
|
|
|
|
# Use pandas tests to check similarity
|
|
assert_frame_equal(left, right._to_pandas())
|
|
|
|
|
|
def assert_eland_frame_equal(left, right):
|
|
if not isinstance(left, ed.DataFrame):
|
|
raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
|
|
exp_type='ed.DataFrame', act_type=type(left)))
|
|
|
|
if not isinstance(right, ed.DataFrame):
|
|
raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
|
|
exp_type='ed.DataFrame', act_type=type(right)))
|
|
|
|
# Use pandas tests to check similarity
|
|
assert_frame_equal(left._to_pandas(), right._to_pandas())
|
|
|
|
|
|
def assert_pandas_eland_series_equal(left, right, check_less_precise=False):
|
|
if not isinstance(left, pd.Series):
|
|
raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
|
|
exp_type='pd.Series', act_type=type(left)))
|
|
|
|
if not isinstance(right, ed.Series):
|
|
raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
|
|
exp_type='ed.Series', act_type=type(right)))
|
|
|
|
# Use pandas tests to check similarity
|
|
assert_series_equal(left, right._to_pandas(), check_less_precise=check_less_precise)
|