diff --git a/tests/dataframe/test_drop_pytest.py b/tests/dataframe/test_drop_pytest.py index 6800fb4..392e1d1 100644 --- a/tests/dataframe/test_drop_pytest.py +++ b/tests/dataframe/test_drop_pytest.py @@ -15,17 +15,24 @@ # specific language governing permissions and limitations # under the License. +import pytest + # File called _pytest for PyCharm compatability +from tests.common import TestData -class TestDataFrameDrop: +class TestDataFrameDrop(TestData): def test_drop(self, df): - df.drop(["Carrier", "DestCityName"], axis=1) + df.drop(labels=["Carrier", "DestCityName"], axis=1) df.drop(columns=["Carrier", "DestCityName"]) df.drop(["1", "2"]) - df.drop(["1", "2"], axis=0) + df.drop(labels=["1", "2"], axis=0) df.drop(index=["1", "2"]) + df.drop(labels="3", axis=0) + df.drop(columns="Carrier") + df.drop(columns=["Carrier", "Carrier_1"], errors="ignore") + df.drop(columns=["Carrier_1"], errors="ignore") def test_drop_all_columns(self, df): all_columns = list(df.columns) @@ -50,3 +57,34 @@ class TestDataFrameDrop: ): assert dropped.shape == (0, cols) assert list(dropped.to_pandas().index) == [] + + def test_drop_raises(self): + ed_flights = self.ed_flights() + + with pytest.raises( + ValueError, match="Cannot specify both 'labels' and 'index'/'columns'" + ): + ed_flights.drop( + labels=["Carrier", "DestCityName"], columns=["Carrier", "DestCityName"] + ) + + with pytest.raises( + ValueError, match="Cannot specify both 'labels' and 'index'/'columns'" + ): + ed_flights.drop(labels=["Carrier", "DestCityName"], index=[0, 1, 2]) + + with pytest.raises( + ValueError, + match="Need to specify at least one of 'labels', 'index' or 'columns'", + ): + ed_flights.drop() + + with pytest.raises( + ValueError, + match="number of labels 0!=2 not contained in axis", + ): + ed_flights.drop(errors="raise", axis=0, labels=["-1", "-2"]) + + with pytest.raises(ValueError) as error: + ed_flights.drop(columns=["Carrier_1"], errors="raise") + assert str(error.value) == "labels ['Carrier_1'] not contained in axis" diff --git a/tests/dataframe/test_es_match_pytest.py b/tests/dataframe/test_es_match_pytest.py index 8ca038b..f9c2a4c 100644 --- a/tests/dataframe/test_es_match_pytest.py +++ b/tests/dataframe/test_es_match_pytest.py @@ -17,14 +17,17 @@ # File called _pytest for PyCharm compatability +import pytest + from tests.common import TestData class TestEsMatch(TestData): - def test_match(self): + @pytest.mark.parametrize("columns", [None, ["category"], "category"]) + def test_match(self, columns): df = self.ed_ecommerce() - categories = list(df.es_match("Men's").category.to_pandas()) + categories = list(df.es_match("Men's", columns=columns).category.to_pandas()) assert len(categories) > 0 assert all(any("Men's" in y for y in x) for x in categories) @@ -39,3 +42,9 @@ class TestEsMatch(TestData): assert len(categories) > 0 assert all(all("Men's" not in y for y in x) for x in categories) assert all(any("Women's" in y for y in x) for x in categories) + + def test_match_raises(self): + df = self.ed_ecommerce() + + with pytest.raises(ValueError, match="columns can't be empty"): + df.es_match("Men's", columns=[]) diff --git a/tests/dataframe/test_repr_pytest.py b/tests/dataframe/test_repr_pytest.py index 82eb720..49b1f2b 100644 --- a/tests/dataframe/test_repr_pytest.py +++ b/tests/dataframe/test_repr_pytest.py @@ -223,23 +223,32 @@ class TestDataFrameRepr(TestData): assert pd.get_option("display.max_rows") == 60 show_dimensions = pd.get_option("display.show_dimensions") + try: + # TODO - there is a bug in 'show_dimensions' as it gets added after the last + # For now test without this + pd.set_option("display.show_dimensions", False) - # TODO - there is a bug in 'show_dimensions' as it gets added after the last - # For now test without this - pd.set_option("display.show_dimensions", False) + # Test eland.DataFrame.to_string vs pandas.DataFrame.to_string + # In pandas calling 'to_string' without max_rows set, will dump ALL rows - # Test eland.DataFrame.to_string vs pandas.DataFrame.to_string - # In pandas calling 'to_string' without max_rows set, will dump ALL rows + # Test n-1, n, n+1 for edge cases + self.num_rows_repr_html(pd.get_option("display.max_rows") - 1) + self.num_rows_repr_html(pd.get_option("display.max_rows")) + self.num_rows_repr_html( + pd.get_option("display.max_rows") + 1, pd.get_option("display.max_rows") + ) + finally: + # Restore default + pd.set_option("display.show_dimensions", show_dimensions) - # Test n-1, n, n+1 for edge cases - self.num_rows_repr_html(pd.get_option("display.max_rows") - 1) - self.num_rows_repr_html(pd.get_option("display.max_rows")) - self.num_rows_repr_html( - pd.get_option("display.max_rows") + 1, pd.get_option("display.max_rows") - ) - - # Restore default - pd.set_option("display.show_dimensions", show_dimensions) + def test_num_rows_repr_html_display_none(self): + display = pd.get_option("display.notebook_repr_html") + try: + pd.set_option("display.notebook_repr_html", False) + self.num_rows_repr_html(pd.get_option("display.max_rows")) + finally: + # Restore default + pd.set_option("display.notebook_repr_html", display) def num_rows_repr_html(self, rows, max_rows=None): ed_flights = self.ed_flights() @@ -251,34 +260,34 @@ class TestDataFrameRepr(TestData): ed_head_str = ed_head._repr_html_() pd_head_str = pd_head._repr_html_() - # print(ed_head_str) - # print(pd_head_str) - assert pd_head_str == ed_head_str def test_empty_dataframe_repr_html(self): # TODO - there is a bug in 'show_dimensions' as it gets added after the last # For now test without this show_dimensions = pd.get_option("display.show_dimensions") - pd.set_option("display.show_dimensions", False) + try: + pd.set_option("display.show_dimensions", False) - ed_ecom = self.ed_ecommerce() - pd_ecom = self.pd_ecommerce() + ed_ecom = self.ed_ecommerce() + pd_ecom = self.pd_ecommerce() - ed_ecom_rh = ed_ecom[ed_ecom["currency"] == "USD"]._repr_html_() - pd_ecom_rh = pd_ecom[pd_ecom["currency"] == "USD"]._repr_html_() + ed_ecom_rh = ed_ecom[ed_ecom["currency"] == "USD"]._repr_html_() + pd_ecom_rh = pd_ecom[pd_ecom["currency"] == "USD"]._repr_html_() - # Restore default - pd.set_option("display.show_dimensions", show_dimensions) - - assert ed_ecom_rh == pd_ecom_rh + assert ed_ecom_rh == pd_ecom_rh + finally: + # Restore default + pd.set_option("display.show_dimensions", show_dimensions) def test_dataframe_repr_pd_get_option_none(self): show_dimensions = pd.get_option("display.show_dimensions") show_rows = pd.get_option("display.max_rows") + expand_frame = pd.get_option("display.expand_frame_repr") try: pd.set_option("display.show_dimensions", False) pd.set_option("display.max_rows", None) + pd.set_option("display.expand_frame_repr", False) columns = [ "AvgTicketPrice", @@ -296,3 +305,4 @@ class TestDataFrameRepr(TestData): # Restore default pd.set_option("display.max_rows", show_rows) pd.set_option("display.show_dimensions", show_dimensions) + pd.set_option("display.expand_frame_repr", expand_frame) diff --git a/tests/dataframe/test_sample_pytest.py b/tests/dataframe/test_sample_pytest.py index e473010..473d07d 100644 --- a/tests/dataframe/test_sample_pytest.py +++ b/tests/dataframe/test_sample_pytest.py @@ -41,17 +41,26 @@ class TestDataFrameSample(TestData): eland_to_pandas(first_sample), eland_to_pandas(second_sample) ) - def test_sample_raises(self): + @pytest.mark.parametrize( + ["opts", "message"], + [ + ( + {"n": 10, "frac": 0.1}, + "Please enter a value for `frac` OR `n`, not both", + ), + ({"frac": 1.5}, "`frac` must be between 0. and 1."), + ( + {"n": -1}, + "A negative number of rows requested. Please provide positive value.", + ), + ({"n": 1.5}, "Only integers accepted as `n` values"), + ], + ) + def test_sample_raises(self, opts, message): ed_flights_small = self.ed_flights_small() - with pytest.raises(ValueError): - ed_flights_small.sample(n=10, frac=0.1) - - with pytest.raises(ValueError): - ed_flights_small.sample(frac=1.5) - - with pytest.raises(ValueError): - ed_flights_small.sample(n=-1) + with pytest.raises(ValueError, match=message): + ed_flights_small.sample(**opts) def test_sample_basic(self): ed_flights_small = self.ed_flights_small()