mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
* Fix test setup to match pandas 2.0 demands * Use the now deprecated _append method (Better solution might exist) * Deal with numeric_only being removed in metrics test * Skip mad metric for other pandas versions * Account for differences between pandas versions in describe methods * Run black * Check Pandas version first * Mirror behaviour of installed Pandas version when running value_counts * Allow passing arguments to the individual asserters * Fix for method _construct_axes_from_arguments no longer existing * Skip mad metric if it does not exist * Account for pandas 2.0 timestamp default behaviour * Deal with empty vs other inferred data types * Account for default datetime precision change * Run Black * Solution for differences in inferred_type only * Fix csv and json issues * Skip two doctests * Passing a set as indexer is no longer allowed * Don't validate output where it differs between Pandas versions in the environment * Update test matrix and packaging metadata * Update version of Python in the docs * Update Python version in demo notebook * Match noxfile * Symmetry * Fix trailing comma in JSON * Revert some changes in setup.py to fix building the documentation * Revert "Revert some changes in setup.py to fix building the documentation" This reverts commit ea9879753129d8d8390b3cbbce57155a8b4fb346. * Use PANDAS_VERSION from eland.common * Still skip the doctest, but make the output pandas 2 instead of 1 * Still skip doctest, but switch to pandas 2 output * Prepare for pandas 3 * Reference the right column * Ignore output in tests but switch to pandas 2 output * Add line comment about NBVAL_IGNORE_OUTPUT * Restore missing line and add stderr cell * Use non-private method instead * Fix indentation and parameter issues * If index is not specified, and pandas 1 is present, set it to True From pandas 2 and upwards, index is set to None by default * Run black * Newer version of black might have different opinions? * Add line comment * Remove unused import * Add reason for ignore statement * Add reason for skip --------- Co-authored-by: Quentin Pradet <quentin.pradet@elastic.co>
167 lines
5.3 KiB
Python
167 lines
5.3 KiB
Python
# Licensed to Elasticsearch B.V. under one or more contributor
|
|
# license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright
|
|
# ownership. Elasticsearch B.V. licenses this file to you under
|
|
# the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import inspect
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
import eland as ed
|
|
|
|
from .common import (
|
|
TestData,
|
|
_ed_ecommerce,
|
|
_ed_flights,
|
|
_ed_flights_small,
|
|
_pd_ecommerce,
|
|
_pd_flights,
|
|
_pd_flights_small,
|
|
assert_frame_equal,
|
|
assert_pandas_eland_frame_equal,
|
|
assert_pandas_eland_series_equal,
|
|
assert_series_equal,
|
|
)
|
|
|
|
|
|
class SymmetricAPIChecker:
|
|
def __init__(self, ed_obj, pd_obj):
|
|
self.ed = ed_obj
|
|
self.pd = pd_obj
|
|
|
|
def load_dataset(self, dataset):
|
|
if dataset == "flights":
|
|
self.ed = _ed_flights
|
|
self.pd = _pd_flights.copy()
|
|
elif dataset == "flights_small":
|
|
self.ed = _ed_flights_small
|
|
self.pd = _pd_flights_small.copy()
|
|
elif dataset == "ecommerce":
|
|
self.ed = _ed_ecommerce
|
|
self.pd = _pd_ecommerce.copy()
|
|
else:
|
|
raise ValueError(f"Unknown dataset {dataset!r}")
|
|
|
|
def return_value_checker(self, func_name):
|
|
"""Returns a function which wraps the requested function
|
|
and checks the return value when that function is inevitably
|
|
called.
|
|
"""
|
|
|
|
def f(*args, **kwargs):
|
|
ed_exc = None
|
|
try:
|
|
ed_obj = getattr(self.ed, func_name)(*args, **kwargs)
|
|
except Exception as e:
|
|
ed_exc = e
|
|
pd_exc = None
|
|
try:
|
|
if func_name == "to_pandas":
|
|
pd_obj = self.pd
|
|
else:
|
|
pd_obj = getattr(self.pd, func_name)(*args, **kwargs)
|
|
except Exception as e:
|
|
pd_exc = e
|
|
|
|
self.check_exception(ed_exc, pd_exc)
|
|
try:
|
|
self.check_values(ed_obj, pd_obj)
|
|
except AssertionError as e:
|
|
# This is an attribute we allow to differ when comparing zero-length objects
|
|
if (
|
|
'Attribute "inferred_type" are different' in repr(e)
|
|
and len(ed_obj) == 0
|
|
and len(pd_obj) == 0
|
|
):
|
|
self.check_values(ed_obj, pd_obj, check_index_type=False)
|
|
|
|
if isinstance(ed_obj, (ed.DataFrame, ed.Series)):
|
|
return SymmetricAPIChecker(ed_obj, pd_obj)
|
|
return pd_obj
|
|
|
|
return f
|
|
|
|
def check_values(self, ed_obj, pd_obj, **kwargs):
|
|
"""Checks that any two values coming from eland and pandas are equal"""
|
|
if isinstance(ed_obj, ed.DataFrame):
|
|
assert_pandas_eland_frame_equal(pd_obj, ed_obj, **kwargs)
|
|
elif isinstance(ed_obj, ed.Series):
|
|
assert_pandas_eland_series_equal(pd_obj, ed_obj, **kwargs)
|
|
elif isinstance(ed_obj, pd.DataFrame):
|
|
assert_frame_equal(ed_obj, pd_obj, **kwargs)
|
|
elif isinstance(ed_obj, pd.Series):
|
|
assert_series_equal(ed_obj, pd_obj, **kwargs)
|
|
elif isinstance(ed_obj, pd.Index):
|
|
assert ed_obj.equals(pd_obj)
|
|
else:
|
|
assert ed_obj == pd_obj
|
|
|
|
def check_exception(self, ed_exc, pd_exc):
|
|
"""Checks that either an exception was raised or not from both eland and pandas"""
|
|
assert (ed_exc is None) == (pd_exc is None) and isinstance(ed_exc, type(pd_exc))
|
|
if pd_exc is not None:
|
|
raise pd_exc
|
|
|
|
def __getitem__(self, item):
|
|
if isinstance(item, SymmetricAPIChecker):
|
|
pd_item = item.pd
|
|
ed_item = item.ed
|
|
else:
|
|
pd_item = ed_item = item
|
|
|
|
ed_exc = None
|
|
pd_exc = None
|
|
try:
|
|
pd_obj = self.pd[pd_item]
|
|
except Exception as e:
|
|
pd_exc = e
|
|
try:
|
|
ed_obj = self.ed[ed_item]
|
|
except Exception as e:
|
|
ed_exc = e
|
|
|
|
self.check_exception(ed_exc, pd_exc)
|
|
if isinstance(ed_obj, (ed.DataFrame, ed.Series)):
|
|
return SymmetricAPIChecker(ed_obj, pd_obj)
|
|
return pd_obj
|
|
|
|
def __getattr__(self, item):
|
|
if item == "to_pandas":
|
|
return self.return_value_checker("to_pandas")
|
|
|
|
pd_obj = getattr(self.pd, item)
|
|
if inspect.isfunction(pd_obj) or inspect.ismethod(pd_obj):
|
|
return self.return_value_checker(item)
|
|
ed_obj = getattr(self.ed, item)
|
|
|
|
self.check_values(ed_obj, pd_obj)
|
|
|
|
if isinstance(ed_obj, (ed.DataFrame, ed.Series)):
|
|
return SymmetricAPIChecker(ed_obj, pd_obj)
|
|
return pd_obj
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def df():
|
|
return SymmetricAPIChecker(
|
|
ed_obj=_ed_flights_small, pd_obj=_pd_flights_small.copy()
|
|
)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def testdata():
|
|
return TestData()
|