eland/tests/dataframe/test_head_tail_pytest.py
Bart Broere 75c57b0775
Support Pandas 2 (#742)
* Fix test setup to match pandas 2.0 demands

* Use the now deprecated _append method

(Better solution might exist)

* Deal with numeric_only being removed in metrics test

* Skip mad metric for other pandas versions

* Account for differences between pandas versions in describe methods

* Run black

* Check Pandas version first

* Mirror behaviour of installed Pandas version when running value_counts

* Allow passing arguments to the individual asserters

* Fix for method _construct_axes_from_arguments no longer existing

* Skip mad metric if it does not exist

* Account for pandas 2.0 timestamp default behaviour

* Deal with empty vs other inferred data types

* Account for default datetime precision change

* Run Black

* Solution for differences in inferred_type only

* Fix csv and json issues

* Skip two doctests

* Passing a set as indexer is no longer allowed

* Don't validate output where it differs between Pandas versions in the environment

* Update test matrix and packaging metadata

* Update version of Python in the docs

* Update Python version in demo notebook

* Match noxfile

* Symmetry

* Fix trailing comma in JSON

* Revert some changes in setup.py to fix building the documentation

* Revert "Revert some changes in setup.py to fix building the documentation"

This reverts commit ea9879753129d8d8390b3cbbce57155a8b4fb346.

* Use PANDAS_VERSION from eland.common

* Still skip the doctest, but make the output pandas 2 instead of 1

* Still skip doctest, but switch to pandas 2 output

* Prepare for pandas 3

* Reference the right column

* Ignore output in tests but switch to pandas 2 output

* Add line comment about NBVAL_IGNORE_OUTPUT

* Restore missing line and add stderr cell

* Use non-private method instead

* Fix indentation and parameter issues

* If index is not specified, and pandas 1 is present, set it to True

From pandas 2 and upwards, index is set to None by default

* Run black

* Newer version of black might have different opinions?

* Add line comment

* Remove unused import

* Add reason for ignore statement

* Add reason for skip

---------

Co-authored-by: Quentin Pradet <quentin.pradet@elastic.co>
2025-02-04 17:43:43 +04:00

121 lines
4.2 KiB
Python

# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# File called _pytest for PyCharm compatability
from tests.common import TestData, assert_pandas_eland_frame_equal
class TestDataFrameHeadTail(TestData):
def test_head(self):
ed_flights = self.ed_flights()
pd_flights = self.pd_flights()
ed_head_10 = ed_flights.head(10)
pd_head_10 = pd_flights.head(10)
assert_pandas_eland_frame_equal(pd_head_10, ed_head_10)
ed_head_8 = ed_head_10.head(8)
pd_head_8 = pd_head_10.head(8)
assert_pandas_eland_frame_equal(pd_head_8, ed_head_8)
ed_head_20 = ed_head_10.head(20)
pd_head_20 = pd_head_10.head(20)
assert_pandas_eland_frame_equal(pd_head_20, ed_head_20)
def test_tail(self):
ed_flights = self.ed_flights()
pd_flights = self.pd_flights()
ed_tail_10 = ed_flights.tail(10)
pd_tail_10 = pd_flights.tail(10)
assert_pandas_eland_frame_equal(pd_tail_10, ed_tail_10)
ed_tail_8 = ed_tail_10.tail(8)
pd_tail_8 = pd_tail_10.tail(8)
assert_pandas_eland_frame_equal(pd_tail_8, ed_tail_8)
ed_tail_20 = ed_tail_10.tail(20)
pd_tail_20 = pd_tail_10.tail(20)
assert_pandas_eland_frame_equal(pd_tail_20, ed_tail_20)
def test_head_tail(self):
ed_flights = self.ed_flights()
pd_flights = self.pd_flights()
ed_head_10 = ed_flights.head(10)
pd_head_10 = pd_flights.head(10)
assert_pandas_eland_frame_equal(pd_head_10, ed_head_10)
ed_tail_8 = ed_head_10.tail(8)
pd_tail_8 = pd_head_10.tail(8)
assert_pandas_eland_frame_equal(pd_tail_8, ed_tail_8)
ed_tail_5 = ed_tail_8.tail(5)
pd_tail_5 = pd_tail_8.tail(5)
assert_pandas_eland_frame_equal(pd_tail_5, ed_tail_5)
ed_tail_4 = ed_tail_5.tail(4)
pd_tail_4 = pd_tail_5.tail(4)
assert_pandas_eland_frame_equal(pd_tail_4, ed_tail_4)
def test_tail_head(self):
ed_flights = self.ed_flights()
pd_flights = self.pd_flights()
ed_tail_10 = ed_flights.tail(10)
pd_tail_10 = pd_flights.tail(10)
assert_pandas_eland_frame_equal(pd_tail_10, ed_tail_10)
ed_head_8 = ed_tail_10.head(8)
pd_head_8 = pd_tail_10.head(8)
assert_pandas_eland_frame_equal(pd_head_8, ed_head_8)
ed_tail_5 = ed_head_8.tail(5)
pd_tail_5 = pd_head_8.tail(5)
assert_pandas_eland_frame_equal(pd_tail_5, ed_tail_5)
ed_head_4 = ed_tail_5.head(4)
pd_head_4 = pd_tail_5.head(4)
assert_pandas_eland_frame_equal(pd_head_4, ed_head_4)
def test_head_0(self):
ed_flights = self.ed_flights()
pd_flights = self.pd_flights()
ed_head_0 = ed_flights.head(0)
pd_head_0 = pd_flights.head(0)
assert_pandas_eland_frame_equal(pd_head_0, ed_head_0, check_index_type=False)
def test_doc_test_tail(self):
df = self.ed_flights()
df = df[(df.OriginAirportID == "AMS") & (df.FlightDelayMin > 60)]
df = df[["timestamp", "OriginAirportID", "DestAirportID", "FlightDelayMin"]]
df = df.tail()
print(df)
def test_doc_test_tail_empty(self):
df = self.ed_flights()
df = df[df.OriginAirportID == "NADA"]
df = df.tail()
assert df.shape[0] == 0
def test_doc_test_tail_single(self):
df = self.ed_flights_small()
df = df[(df.Carrier == "Kibana Airlines") & (df.DestAirportID == "ITM")].tail()
assert df.shape[0] == 1