mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Improve coverage for eland.dataframe
This commit is contained in:
parent
b8e192b7d0
commit
bc201e22dd
@ -15,17 +15,24 @@
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import pytest
|
||||
|
||||
# File called _pytest for PyCharm compatability
|
||||
from tests.common import TestData
|
||||
|
||||
|
||||
class TestDataFrameDrop:
|
||||
class TestDataFrameDrop(TestData):
|
||||
def test_drop(self, df):
|
||||
df.drop(["Carrier", "DestCityName"], axis=1)
|
||||
df.drop(labels=["Carrier", "DestCityName"], axis=1)
|
||||
df.drop(columns=["Carrier", "DestCityName"])
|
||||
|
||||
df.drop(["1", "2"])
|
||||
df.drop(["1", "2"], axis=0)
|
||||
df.drop(labels=["1", "2"], axis=0)
|
||||
df.drop(index=["1", "2"])
|
||||
df.drop(labels="3", axis=0)
|
||||
df.drop(columns="Carrier")
|
||||
df.drop(columns=["Carrier", "Carrier_1"], errors="ignore")
|
||||
df.drop(columns=["Carrier_1"], errors="ignore")
|
||||
|
||||
def test_drop_all_columns(self, df):
|
||||
all_columns = list(df.columns)
|
||||
@ -50,3 +57,34 @@ class TestDataFrameDrop:
|
||||
):
|
||||
assert dropped.shape == (0, cols)
|
||||
assert list(dropped.to_pandas().index) == []
|
||||
|
||||
def test_drop_raises(self):
|
||||
ed_flights = self.ed_flights()
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="Cannot specify both 'labels' and 'index'/'columns'"
|
||||
):
|
||||
ed_flights.drop(
|
||||
labels=["Carrier", "DestCityName"], columns=["Carrier", "DestCityName"]
|
||||
)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="Cannot specify both 'labels' and 'index'/'columns'"
|
||||
):
|
||||
ed_flights.drop(labels=["Carrier", "DestCityName"], index=[0, 1, 2])
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Need to specify at least one of 'labels', 'index' or 'columns'",
|
||||
):
|
||||
ed_flights.drop()
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="number of labels 0!=2 not contained in axis",
|
||||
):
|
||||
ed_flights.drop(errors="raise", axis=0, labels=["-1", "-2"])
|
||||
|
||||
with pytest.raises(ValueError) as error:
|
||||
ed_flights.drop(columns=["Carrier_1"], errors="raise")
|
||||
assert str(error.value) == "labels ['Carrier_1'] not contained in axis"
|
||||
|
@ -17,14 +17,17 @@
|
||||
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.common import TestData
|
||||
|
||||
|
||||
class TestEsMatch(TestData):
|
||||
def test_match(self):
|
||||
@pytest.mark.parametrize("columns", [None, ["category"], "category"])
|
||||
def test_match(self, columns):
|
||||
df = self.ed_ecommerce()
|
||||
|
||||
categories = list(df.es_match("Men's").category.to_pandas())
|
||||
categories = list(df.es_match("Men's", columns=columns).category.to_pandas())
|
||||
assert len(categories) > 0
|
||||
assert all(any("Men's" in y for y in x) for x in categories)
|
||||
|
||||
@ -39,3 +42,9 @@ class TestEsMatch(TestData):
|
||||
assert len(categories) > 0
|
||||
assert all(all("Men's" not in y for y in x) for x in categories)
|
||||
assert all(any("Women's" in y for y in x) for x in categories)
|
||||
|
||||
def test_match_raises(self):
|
||||
df = self.ed_ecommerce()
|
||||
|
||||
with pytest.raises(ValueError, match="columns can't be empty"):
|
||||
df.es_match("Men's", columns=[])
|
||||
|
@ -223,7 +223,7 @@ class TestDataFrameRepr(TestData):
|
||||
assert pd.get_option("display.max_rows") == 60
|
||||
|
||||
show_dimensions = pd.get_option("display.show_dimensions")
|
||||
|
||||
try:
|
||||
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
|
||||
# For now test without this
|
||||
pd.set_option("display.show_dimensions", False)
|
||||
@ -237,10 +237,19 @@ class TestDataFrameRepr(TestData):
|
||||
self.num_rows_repr_html(
|
||||
pd.get_option("display.max_rows") + 1, pd.get_option("display.max_rows")
|
||||
)
|
||||
|
||||
finally:
|
||||
# Restore default
|
||||
pd.set_option("display.show_dimensions", show_dimensions)
|
||||
|
||||
def test_num_rows_repr_html_display_none(self):
|
||||
display = pd.get_option("display.notebook_repr_html")
|
||||
try:
|
||||
pd.set_option("display.notebook_repr_html", False)
|
||||
self.num_rows_repr_html(pd.get_option("display.max_rows"))
|
||||
finally:
|
||||
# Restore default
|
||||
pd.set_option("display.notebook_repr_html", display)
|
||||
|
||||
def num_rows_repr_html(self, rows, max_rows=None):
|
||||
ed_flights = self.ed_flights()
|
||||
pd_flights = self.pd_flights()
|
||||
@ -251,15 +260,13 @@ class TestDataFrameRepr(TestData):
|
||||
ed_head_str = ed_head._repr_html_()
|
||||
pd_head_str = pd_head._repr_html_()
|
||||
|
||||
# print(ed_head_str)
|
||||
# print(pd_head_str)
|
||||
|
||||
assert pd_head_str == ed_head_str
|
||||
|
||||
def test_empty_dataframe_repr_html(self):
|
||||
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
|
||||
# For now test without this
|
||||
show_dimensions = pd.get_option("display.show_dimensions")
|
||||
try:
|
||||
pd.set_option("display.show_dimensions", False)
|
||||
|
||||
ed_ecom = self.ed_ecommerce()
|
||||
@ -268,17 +275,19 @@ class TestDataFrameRepr(TestData):
|
||||
ed_ecom_rh = ed_ecom[ed_ecom["currency"] == "USD"]._repr_html_()
|
||||
pd_ecom_rh = pd_ecom[pd_ecom["currency"] == "USD"]._repr_html_()
|
||||
|
||||
assert ed_ecom_rh == pd_ecom_rh
|
||||
finally:
|
||||
# Restore default
|
||||
pd.set_option("display.show_dimensions", show_dimensions)
|
||||
|
||||
assert ed_ecom_rh == pd_ecom_rh
|
||||
|
||||
def test_dataframe_repr_pd_get_option_none(self):
|
||||
show_dimensions = pd.get_option("display.show_dimensions")
|
||||
show_rows = pd.get_option("display.max_rows")
|
||||
expand_frame = pd.get_option("display.expand_frame_repr")
|
||||
try:
|
||||
pd.set_option("display.show_dimensions", False)
|
||||
pd.set_option("display.max_rows", None)
|
||||
pd.set_option("display.expand_frame_repr", False)
|
||||
|
||||
columns = [
|
||||
"AvgTicketPrice",
|
||||
@ -296,3 +305,4 @@ class TestDataFrameRepr(TestData):
|
||||
# Restore default
|
||||
pd.set_option("display.max_rows", show_rows)
|
||||
pd.set_option("display.show_dimensions", show_dimensions)
|
||||
pd.set_option("display.expand_frame_repr", expand_frame)
|
||||
|
@ -41,17 +41,26 @@ class TestDataFrameSample(TestData):
|
||||
eland_to_pandas(first_sample), eland_to_pandas(second_sample)
|
||||
)
|
||||
|
||||
def test_sample_raises(self):
|
||||
@pytest.mark.parametrize(
|
||||
["opts", "message"],
|
||||
[
|
||||
(
|
||||
{"n": 10, "frac": 0.1},
|
||||
"Please enter a value for `frac` OR `n`, not both",
|
||||
),
|
||||
({"frac": 1.5}, "`frac` must be between 0. and 1."),
|
||||
(
|
||||
{"n": -1},
|
||||
"A negative number of rows requested. Please provide positive value.",
|
||||
),
|
||||
({"n": 1.5}, "Only integers accepted as `n` values"),
|
||||
],
|
||||
)
|
||||
def test_sample_raises(self, opts, message):
|
||||
ed_flights_small = self.ed_flights_small()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ed_flights_small.sample(n=10, frac=0.1)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ed_flights_small.sample(frac=1.5)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ed_flights_small.sample(n=-1)
|
||||
with pytest.raises(ValueError, match=message):
|
||||
ed_flights_small.sample(**opts)
|
||||
|
||||
def test_sample_basic(self):
|
||||
ed_flights_small = self.ed_flights_small()
|
||||
|
Loading…
x
Reference in New Issue
Block a user