Improve coverage for eland.dataframe

This commit is contained in:
P. Sai Vinay 2021-09-29 01:41:57 +05:30 committed by GitHub
parent b8e192b7d0
commit bc201e22dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 106 additions and 40 deletions

View File

@ -15,17 +15,24 @@
# specific language governing permissions and limitations
# under the License.
import pytest
# File called _pytest for PyCharm compatability
from tests.common import TestData
class TestDataFrameDrop:
class TestDataFrameDrop(TestData):
def test_drop(self, df):
df.drop(["Carrier", "DestCityName"], axis=1)
df.drop(labels=["Carrier", "DestCityName"], axis=1)
df.drop(columns=["Carrier", "DestCityName"])
df.drop(["1", "2"])
df.drop(["1", "2"], axis=0)
df.drop(labels=["1", "2"], axis=0)
df.drop(index=["1", "2"])
df.drop(labels="3", axis=0)
df.drop(columns="Carrier")
df.drop(columns=["Carrier", "Carrier_1"], errors="ignore")
df.drop(columns=["Carrier_1"], errors="ignore")
def test_drop_all_columns(self, df):
all_columns = list(df.columns)
@ -50,3 +57,34 @@ class TestDataFrameDrop:
):
assert dropped.shape == (0, cols)
assert list(dropped.to_pandas().index) == []
def test_drop_raises(self):
ed_flights = self.ed_flights()
with pytest.raises(
ValueError, match="Cannot specify both 'labels' and 'index'/'columns'"
):
ed_flights.drop(
labels=["Carrier", "DestCityName"], columns=["Carrier", "DestCityName"]
)
with pytest.raises(
ValueError, match="Cannot specify both 'labels' and 'index'/'columns'"
):
ed_flights.drop(labels=["Carrier", "DestCityName"], index=[0, 1, 2])
with pytest.raises(
ValueError,
match="Need to specify at least one of 'labels', 'index' or 'columns'",
):
ed_flights.drop()
with pytest.raises(
ValueError,
match="number of labels 0!=2 not contained in axis",
):
ed_flights.drop(errors="raise", axis=0, labels=["-1", "-2"])
with pytest.raises(ValueError) as error:
ed_flights.drop(columns=["Carrier_1"], errors="raise")
assert str(error.value) == "labels ['Carrier_1'] not contained in axis"

View File

@ -17,14 +17,17 @@
# File called _pytest for PyCharm compatability
import pytest
from tests.common import TestData
class TestEsMatch(TestData):
def test_match(self):
@pytest.mark.parametrize("columns", [None, ["category"], "category"])
def test_match(self, columns):
df = self.ed_ecommerce()
categories = list(df.es_match("Men's").category.to_pandas())
categories = list(df.es_match("Men's", columns=columns).category.to_pandas())
assert len(categories) > 0
assert all(any("Men's" in y for y in x) for x in categories)
@ -39,3 +42,9 @@ class TestEsMatch(TestData):
assert len(categories) > 0
assert all(all("Men's" not in y for y in x) for x in categories)
assert all(any("Women's" in y for y in x) for x in categories)
def test_match_raises(self):
df = self.ed_ecommerce()
with pytest.raises(ValueError, match="columns can't be empty"):
df.es_match("Men's", columns=[])

View File

@ -223,23 +223,32 @@ class TestDataFrameRepr(TestData):
assert pd.get_option("display.max_rows") == 60
show_dimensions = pd.get_option("display.show_dimensions")
try:
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
# For now test without this
pd.set_option("display.show_dimensions", False)
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
# For now test without this
pd.set_option("display.show_dimensions", False)
# Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
# In pandas calling 'to_string' without max_rows set, will dump ALL rows
# Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
# In pandas calling 'to_string' without max_rows set, will dump ALL rows
# Test n-1, n, n+1 for edge cases
self.num_rows_repr_html(pd.get_option("display.max_rows") - 1)
self.num_rows_repr_html(pd.get_option("display.max_rows"))
self.num_rows_repr_html(
pd.get_option("display.max_rows") + 1, pd.get_option("display.max_rows")
)
finally:
# Restore default
pd.set_option("display.show_dimensions", show_dimensions)
# Test n-1, n, n+1 for edge cases
self.num_rows_repr_html(pd.get_option("display.max_rows") - 1)
self.num_rows_repr_html(pd.get_option("display.max_rows"))
self.num_rows_repr_html(
pd.get_option("display.max_rows") + 1, pd.get_option("display.max_rows")
)
# Restore default
pd.set_option("display.show_dimensions", show_dimensions)
def test_num_rows_repr_html_display_none(self):
display = pd.get_option("display.notebook_repr_html")
try:
pd.set_option("display.notebook_repr_html", False)
self.num_rows_repr_html(pd.get_option("display.max_rows"))
finally:
# Restore default
pd.set_option("display.notebook_repr_html", display)
def num_rows_repr_html(self, rows, max_rows=None):
ed_flights = self.ed_flights()
@ -251,34 +260,34 @@ class TestDataFrameRepr(TestData):
ed_head_str = ed_head._repr_html_()
pd_head_str = pd_head._repr_html_()
# print(ed_head_str)
# print(pd_head_str)
assert pd_head_str == ed_head_str
def test_empty_dataframe_repr_html(self):
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
# For now test without this
show_dimensions = pd.get_option("display.show_dimensions")
pd.set_option("display.show_dimensions", False)
try:
pd.set_option("display.show_dimensions", False)
ed_ecom = self.ed_ecommerce()
pd_ecom = self.pd_ecommerce()
ed_ecom = self.ed_ecommerce()
pd_ecom = self.pd_ecommerce()
ed_ecom_rh = ed_ecom[ed_ecom["currency"] == "USD"]._repr_html_()
pd_ecom_rh = pd_ecom[pd_ecom["currency"] == "USD"]._repr_html_()
ed_ecom_rh = ed_ecom[ed_ecom["currency"] == "USD"]._repr_html_()
pd_ecom_rh = pd_ecom[pd_ecom["currency"] == "USD"]._repr_html_()
# Restore default
pd.set_option("display.show_dimensions", show_dimensions)
assert ed_ecom_rh == pd_ecom_rh
assert ed_ecom_rh == pd_ecom_rh
finally:
# Restore default
pd.set_option("display.show_dimensions", show_dimensions)
def test_dataframe_repr_pd_get_option_none(self):
show_dimensions = pd.get_option("display.show_dimensions")
show_rows = pd.get_option("display.max_rows")
expand_frame = pd.get_option("display.expand_frame_repr")
try:
pd.set_option("display.show_dimensions", False)
pd.set_option("display.max_rows", None)
pd.set_option("display.expand_frame_repr", False)
columns = [
"AvgTicketPrice",
@ -296,3 +305,4 @@ class TestDataFrameRepr(TestData):
# Restore default
pd.set_option("display.max_rows", show_rows)
pd.set_option("display.show_dimensions", show_dimensions)
pd.set_option("display.expand_frame_repr", expand_frame)

View File

@ -41,17 +41,26 @@ class TestDataFrameSample(TestData):
eland_to_pandas(first_sample), eland_to_pandas(second_sample)
)
def test_sample_raises(self):
@pytest.mark.parametrize(
["opts", "message"],
[
(
{"n": 10, "frac": 0.1},
"Please enter a value for `frac` OR `n`, not both",
),
({"frac": 1.5}, "`frac` must be between 0. and 1."),
(
{"n": -1},
"A negative number of rows requested. Please provide positive value.",
),
({"n": 1.5}, "Only integers accepted as `n` values"),
],
)
def test_sample_raises(self, opts, message):
ed_flights_small = self.ed_flights_small()
with pytest.raises(ValueError):
ed_flights_small.sample(n=10, frac=0.1)
with pytest.raises(ValueError):
ed_flights_small.sample(frac=1.5)
with pytest.raises(ValueError):
ed_flights_small.sample(n=-1)
with pytest.raises(ValueError, match=message):
ed_flights_small.sample(**opts)
def test_sample_basic(self):
ed_flights_small = self.ed_flights_small()