Improve coverage for eland.dataframe

2025-07-11 00:02:14 +08:00 · 2021-09-29 01:41:57 +05:30 · 2021-09-29 01:41:57 +05:30 · bc201e22dd
commit bc201e22dd
parent b8e192b7d0
4 changed files with 106 additions and 40 deletions
--- a/tests/dataframe/test_drop_pytest.py
+++ b/tests/dataframe/test_drop_pytest.py
@ -15,17 +15,24 @@
 #  specific language governing permissions and limitations
 #  under the License.

+import pytest
+
 # File called _pytest for PyCharm compatability
+from tests.common import TestData


-class TestDataFrameDrop:
+class TestDataFrameDrop(TestData):
    def test_drop(self, df):
-        df.drop(["Carrier", "DestCityName"], axis=1)
+        df.drop(labels=["Carrier", "DestCityName"], axis=1)
        df.drop(columns=["Carrier", "DestCityName"])

        df.drop(["1", "2"])
-        df.drop(["1", "2"], axis=0)
+        df.drop(labels=["1", "2"], axis=0)
        df.drop(index=["1", "2"])
+        df.drop(labels="3", axis=0)
+        df.drop(columns="Carrier")
+        df.drop(columns=["Carrier", "Carrier_1"], errors="ignore")
+        df.drop(columns=["Carrier_1"], errors="ignore")

    def test_drop_all_columns(self, df):
        all_columns = list(df.columns)
@ -50,3 +57,34 @@ class TestDataFrameDrop:
        ):
            assert dropped.shape == (0, cols)
            assert list(dropped.to_pandas().index) == []
+
+    def test_drop_raises(self):
+        ed_flights = self.ed_flights()
+
+        with pytest.raises(
+            ValueError, match="Cannot specify both 'labels' and 'index'/'columns'"
+        ):
+            ed_flights.drop(
+                labels=["Carrier", "DestCityName"], columns=["Carrier", "DestCityName"]
+            )
+
+        with pytest.raises(
+            ValueError, match="Cannot specify both 'labels' and 'index'/'columns'"
+        ):
+            ed_flights.drop(labels=["Carrier", "DestCityName"], index=[0, 1, 2])
+
+        with pytest.raises(
+            ValueError,
+            match="Need to specify at least one of 'labels', 'index' or 'columns'",
+        ):
+            ed_flights.drop()
+
+        with pytest.raises(
+            ValueError,
+            match="number of labels 0!=2 not contained in axis",
+        ):
+            ed_flights.drop(errors="raise", axis=0, labels=["-1", "-2"])
+
+        with pytest.raises(ValueError) as error:
+            ed_flights.drop(columns=["Carrier_1"], errors="raise")
+            assert str(error.value) == "labels ['Carrier_1'] not contained in axis"
--- a/tests/dataframe/test_es_match_pytest.py
+++ b/tests/dataframe/test_es_match_pytest.py
@ -17,14 +17,17 @@

 # File called _pytest for PyCharm compatability

+import pytest
+
 from tests.common import TestData


 class TestEsMatch(TestData):
-    def test_match(self):
+    @pytest.mark.parametrize("columns", [None, ["category"], "category"])
+    def test_match(self, columns):
        df = self.ed_ecommerce()

-        categories = list(df.es_match("Men's").category.to_pandas())
+        categories = list(df.es_match("Men's", columns=columns).category.to_pandas())
        assert len(categories) > 0
        assert all(any("Men's" in y for y in x) for x in categories)

@ -39,3 +42,9 @@ class TestEsMatch(TestData):
        assert len(categories) > 0
        assert all(all("Men's" not in y for y in x) for x in categories)
        assert all(any("Women's" in y for y in x) for x in categories)
+
+    def test_match_raises(self):
+        df = self.ed_ecommerce()
+
+        with pytest.raises(ValueError, match="columns can't be empty"):
+            df.es_match("Men's", columns=[])
--- a/tests/dataframe/test_repr_pytest.py
+++ b/tests/dataframe/test_repr_pytest.py
@ -223,23 +223,32 @@ class TestDataFrameRepr(TestData):
        assert pd.get_option("display.max_rows") == 60

        show_dimensions = pd.get_option("display.show_dimensions")
+        try:
+            # TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
+            # For now test without this
+            pd.set_option("display.show_dimensions", False)

-        # TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
-        # For now test without this
-        pd.set_option("display.show_dimensions", False)
+            # Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
+            # In pandas calling 'to_string' without max_rows set, will dump ALL rows

-        # Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
-        # In pandas calling 'to_string' without max_rows set, will dump ALL rows
+            # Test n-1, n, n+1 for edge cases
+            self.num_rows_repr_html(pd.get_option("display.max_rows") - 1)
+            self.num_rows_repr_html(pd.get_option("display.max_rows"))
+            self.num_rows_repr_html(
+                pd.get_option("display.max_rows") + 1, pd.get_option("display.max_rows")
+            )
+        finally:
+            # Restore default
+            pd.set_option("display.show_dimensions", show_dimensions)

-        # Test n-1, n, n+1 for edge cases
-        self.num_rows_repr_html(pd.get_option("display.max_rows") - 1)
-        self.num_rows_repr_html(pd.get_option("display.max_rows"))
-        self.num_rows_repr_html(
-            pd.get_option("display.max_rows") + 1, pd.get_option("display.max_rows")
-        )
-
-        # Restore default
-        pd.set_option("display.show_dimensions", show_dimensions)
+    def test_num_rows_repr_html_display_none(self):
+        display = pd.get_option("display.notebook_repr_html")
+        try:
+            pd.set_option("display.notebook_repr_html", False)
+            self.num_rows_repr_html(pd.get_option("display.max_rows"))
+        finally:
+            # Restore default
+            pd.set_option("display.notebook_repr_html", display)

    def num_rows_repr_html(self, rows, max_rows=None):
        ed_flights = self.ed_flights()
@ -251,34 +260,34 @@ class TestDataFrameRepr(TestData):
        ed_head_str = ed_head._repr_html_()
        pd_head_str = pd_head._repr_html_()

-        # print(ed_head_str)
-        # print(pd_head_str)
-
        assert pd_head_str == ed_head_str

    def test_empty_dataframe_repr_html(self):
        # TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
        # For now test without this
        show_dimensions = pd.get_option("display.show_dimensions")
-        pd.set_option("display.show_dimensions", False)
+        try:
+            pd.set_option("display.show_dimensions", False)

-        ed_ecom = self.ed_ecommerce()
-        pd_ecom = self.pd_ecommerce()
+            ed_ecom = self.ed_ecommerce()
+            pd_ecom = self.pd_ecommerce()

-        ed_ecom_rh = ed_ecom[ed_ecom["currency"] == "USD"]._repr_html_()
-        pd_ecom_rh = pd_ecom[pd_ecom["currency"] == "USD"]._repr_html_()
+            ed_ecom_rh = ed_ecom[ed_ecom["currency"] == "USD"]._repr_html_()
+            pd_ecom_rh = pd_ecom[pd_ecom["currency"] == "USD"]._repr_html_()

-        # Restore default
-        pd.set_option("display.show_dimensions", show_dimensions)
-
-        assert ed_ecom_rh == pd_ecom_rh
+            assert ed_ecom_rh == pd_ecom_rh
+        finally:
+            # Restore default
+            pd.set_option("display.show_dimensions", show_dimensions)

    def test_dataframe_repr_pd_get_option_none(self):
        show_dimensions = pd.get_option("display.show_dimensions")
        show_rows = pd.get_option("display.max_rows")
+        expand_frame = pd.get_option("display.expand_frame_repr")
        try:
            pd.set_option("display.show_dimensions", False)
            pd.set_option("display.max_rows", None)
+            pd.set_option("display.expand_frame_repr", False)

            columns = [
                "AvgTicketPrice",
@ -296,3 +305,4 @@ class TestDataFrameRepr(TestData):
            # Restore default
            pd.set_option("display.max_rows", show_rows)
            pd.set_option("display.show_dimensions", show_dimensions)
+            pd.set_option("display.expand_frame_repr", expand_frame)
--- a/tests/dataframe/test_sample_pytest.py
+++ b/tests/dataframe/test_sample_pytest.py
@ -41,17 +41,26 @@ class TestDataFrameSample(TestData):
            eland_to_pandas(first_sample), eland_to_pandas(second_sample)
        )

-    def test_sample_raises(self):
+    @pytest.mark.parametrize(
+        ["opts", "message"],
+        [
+            (
+                {"n": 10, "frac": 0.1},
+                "Please enter a value for `frac` OR `n`, not both",
+            ),
+            ({"frac": 1.5}, "`frac` must be between 0. and 1."),
+            (
+                {"n": -1},
+                "A negative number of rows requested. Please provide positive value.",
+            ),
+            ({"n": 1.5}, "Only integers accepted as `n` values"),
+        ],
+    )
+    def test_sample_raises(self, opts, message):
        ed_flights_small = self.ed_flights_small()

-        with pytest.raises(ValueError):
-            ed_flights_small.sample(n=10, frac=0.1)
-
-        with pytest.raises(ValueError):
-            ed_flights_small.sample(frac=1.5)
-
-        with pytest.raises(ValueError):
-            ed_flights_small.sample(n=-1)
+        with pytest.raises(ValueError, match=message):
+            ed_flights_small.sample(**opts)

    def test_sample_basic(self):
        ed_flights_small = self.ed_flights_small()