Remove deprecated code in XGBoost and test suite

This commit is contained in:
P. Sai Vinay 2021-06-09 01:49:56 +05:30 committed by GitHub
parent e9c0b897f5
commit 7e8520a8ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 30 additions and 52 deletions

View File

@ -196,7 +196,7 @@ class MLModel:
# Return results as np.ndarray of float32 or int (consistent with sklearn/xgboost) # Return results as np.ndarray of float32 or int (consistent with sklearn/xgboost)
if self.model_type == TYPE_CLASSIFICATION: if self.model_type == TYPE_CLASSIFICATION:
dt = np.int dt = np.int_
else: else:
dt = np.float32 dt = np.float32
return np.asarray(y, dtype=dt) return np.asarray(y, dtype=dt)

View File

@ -94,7 +94,7 @@ class TestDataFrameAggs(TestData):
# Eland returns all float values for all metric aggs, pandas can return int # Eland returns all float values for all metric aggs, pandas can return int
# TODO - investigate this more # TODO - investigate this more
pd_aggs = pd_aggs.astype("float64") pd_aggs = pd_aggs.astype("float64")
assert_frame_equal(pd_aggs, ed_aggs, check_exact=False, check_less_precise=2) assert_frame_equal(pd_aggs, ed_aggs, check_exact=False, rtol=2)
# If Aggregate is given a string then series is returned. # If Aggregate is given a string then series is returned.
@pytest.mark.parametrize("agg", ["mean", "min", "max"]) @pytest.mark.parametrize("agg", ["mean", "min", "max"])

View File

@ -34,7 +34,7 @@ class TestDataFrameDescribe(TestData):
pd_describe.drop(["25%", "50%", "75%"], axis="index"), pd_describe.drop(["25%", "50%", "75%"], axis="index"),
ed_describe.drop(["25%", "50%", "75%"], axis="index"), ed_describe.drop(["25%", "50%", "75%"], axis="index"),
check_exact=False, check_exact=False,
check_less_precise=True, rtol=True,
) )
# TODO - this fails for percentile fields as ES aggregations are approximate # TODO - this fails for percentile fields as ES aggregations are approximate

View File

@ -263,10 +263,12 @@ class TestMLModel:
training_data = datasets.make_classification( training_data = datasets.make_classification(
n_features=5, n_classes=3, n_informative=3 n_features=5, n_classes=3, n_informative=3
) )
classifier = XGBClassifier(booster="gbtree", objective="multi:softmax") classifier = XGBClassifier(
booster="gbtree", objective="multi:softmax", use_label_encoder=False
)
else: else:
training_data = datasets.make_classification(n_features=5) training_data = datasets.make_classification(n_features=5)
classifier = XGBClassifier(booster="gbtree") classifier = XGBClassifier(booster="gbtree", use_label_encoder=False)
# Train model # Train model
classifier.fit(training_data[0], training_data[1]) classifier.fit(training_data[0], training_data[1])
@ -303,10 +305,14 @@ class TestMLModel:
training_data = datasets.make_classification( training_data = datasets.make_classification(
n_features=5, n_classes=3, n_informative=3 n_features=5, n_classes=3, n_informative=3
) )
classifier = XGBClassifier(booster=booster, objective=objective) classifier = XGBClassifier(
booster=booster, objective=objective, use_label_encoder=False
)
else: else:
training_data = datasets.make_classification(n_features=5) training_data = datasets.make_classification(n_features=5)
classifier = XGBClassifier(booster=booster, objective=objective) classifier = XGBClassifier(
booster=booster, objective=objective, use_label_encoder=False
)
# Train model # Train model
classifier.fit(training_data[0], training_data[1]) classifier.fit(training_data[0], training_data[1])

View File

@ -50,7 +50,7 @@ class TestSeriesArithmetics(TestData):
+ ed_df["total_quantity"] + ed_df["total_quantity"]
) )
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
def test_ecommerce_series_simple_integer_addition(self): def test_ecommerce_series_simple_integer_addition(self):
pd_df = self.pd_ecommerce().head(100) pd_df = self.pd_ecommerce().head(100)
@ -59,7 +59,7 @@ class TestSeriesArithmetics(TestData):
pd_series = pd_df["taxful_total_price"] + 5 pd_series = pd_df["taxful_total_price"] + 5
ed_series = ed_df["taxful_total_price"] + 5 ed_series = ed_df["taxful_total_price"] + 5
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
def test_ecommerce_series_simple_series_addition(self): def test_ecommerce_series_simple_series_addition(self):
pd_df = self.pd_ecommerce().head(100) pd_df = self.pd_ecommerce().head(100)
@ -68,7 +68,7 @@ class TestSeriesArithmetics(TestData):
pd_series = pd_df["taxful_total_price"] + pd_df["total_quantity"] pd_series = pd_df["taxful_total_price"] + pd_df["total_quantity"]
ed_series = ed_df["taxful_total_price"] + ed_df["total_quantity"] ed_series = ed_df["taxful_total_price"] + ed_df["total_quantity"]
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
def test_ecommerce_series_basic_arithmetics(self): def test_ecommerce_series_basic_arithmetics(self):
pd_df = self.pd_ecommerce().head(100) pd_df = self.pd_ecommerce().head(100)
@ -98,27 +98,19 @@ class TestSeriesArithmetics(TestData):
ed_series = getattr(ed_df["taxful_total_price"], op)( ed_series = getattr(ed_df["taxful_total_price"], op)(
ed_df["total_quantity"] ed_df["total_quantity"]
) )
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
pd_series = getattr(pd_df["taxful_total_price"], op)(10.56) pd_series = getattr(pd_df["taxful_total_price"], op)(10.56)
ed_series = getattr(ed_df["taxful_total_price"], op)(10.56) ed_series = getattr(ed_df["taxful_total_price"], op)(10.56)
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(1.879)) pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(1.879))
ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(1.879)) ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(1.879))
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
pd_series = getattr(pd_df["taxful_total_price"], op)(int(8)) pd_series = getattr(pd_df["taxful_total_price"], op)(int(8))
ed_series = getattr(ed_df["taxful_total_price"], op)(int(8)) ed_series = getattr(ed_df["taxful_total_price"], op)(int(8))
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
def test_supported_series_dtypes_ops(self): def test_supported_series_dtypes_ops(self):
pd_df = self.pd_ecommerce().head(100) pd_df = self.pd_ecommerce().head(100)
@ -153,9 +145,7 @@ class TestSeriesArithmetics(TestData):
ed_series = getattr(ed_df["taxful_total_price"], op)( ed_series = getattr(ed_df["taxful_total_price"], op)(
ed_df["taxless_total_price"] ed_df["taxless_total_price"]
) )
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
# int op float # int op float
for op in numeric_ops: for op in numeric_ops:
@ -165,9 +155,7 @@ class TestSeriesArithmetics(TestData):
ed_series = getattr(ed_df["total_quantity"], op)( ed_series = getattr(ed_df["total_quantity"], op)(
ed_df["taxless_total_price"] ed_df["taxless_total_price"]
) )
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
# float op int # float op int
for op in numeric_ops: for op in numeric_ops:
@ -177,9 +165,7 @@ class TestSeriesArithmetics(TestData):
ed_series = getattr(ed_df["taxful_total_price"], op)( ed_series = getattr(ed_df["taxful_total_price"], op)(
ed_df["total_quantity"] ed_df["total_quantity"]
) )
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
# str op int (throws) # str op int (throws)
for op in non_string_numeric_ops: for op in non_string_numeric_ops:
@ -227,27 +213,19 @@ class TestSeriesArithmetics(TestData):
ed_series = getattr(ed_df["taxful_total_price"], op)( ed_series = getattr(ed_df["taxful_total_price"], op)(
ed_df["total_quantity"] ed_df["total_quantity"]
) )
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
pd_series = getattr(pd_df["taxful_total_price"], op)(3.141) pd_series = getattr(pd_df["taxful_total_price"], op)(3.141)
ed_series = getattr(ed_df["taxful_total_price"], op)(3.141) ed_series = getattr(ed_df["taxful_total_price"], op)(3.141)
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(2.879)) pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(2.879))
ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(2.879)) ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(2.879))
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
pd_series = getattr(pd_df["taxful_total_price"], op)(int(6)) pd_series = getattr(pd_df["taxful_total_price"], op)(int(6))
ed_series = getattr(ed_df["taxful_total_price"], op)(int(6)) ed_series = getattr(ed_df["taxful_total_price"], op)(int(6))
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
def test_supported_series_dtypes_rops(self): def test_supported_series_dtypes_rops(self):
pd_df = self.pd_ecommerce().head(100) pd_df = self.pd_ecommerce().head(100)
@ -282,9 +260,7 @@ class TestSeriesArithmetics(TestData):
ed_series = getattr(ed_df["taxful_total_price"], op)( ed_series = getattr(ed_df["taxful_total_price"], op)(
ed_df["taxless_total_price"] ed_df["taxless_total_price"]
) )
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
# int op float # int op float
for op in numeric_ops: for op in numeric_ops:
@ -294,9 +270,7 @@ class TestSeriesArithmetics(TestData):
ed_series = getattr(ed_df["total_quantity"], op)( ed_series = getattr(ed_df["total_quantity"], op)(
ed_df["taxless_total_price"] ed_df["taxless_total_price"]
) )
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
# float op int # float op int
for op in numeric_ops: for op in numeric_ops:
@ -306,9 +280,7 @@ class TestSeriesArithmetics(TestData):
ed_series = getattr(ed_df["taxful_total_price"], op)( ed_series = getattr(ed_df["taxful_total_price"], op)(
ed_df["total_quantity"] ed_df["total_quantity"]
) )
assert_pandas_eland_series_equal( assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
pd_series, ed_series, check_less_precise=True
)
# str op int (throws) # str op int (throws)
for op in non_string_numeric_ops: for op in non_string_numeric_ops: