diff --git a/eland/ml/ml_model.py b/eland/ml/ml_model.py index ff0e615..29c5b66 100644 --- a/eland/ml/ml_model.py +++ b/eland/ml/ml_model.py @@ -196,7 +196,7 @@ class MLModel: # Return results as np.ndarray of float32 or int (consistent with sklearn/xgboost) if self.model_type == TYPE_CLASSIFICATION: - dt = np.int + dt = np.int_ else: dt = np.float32 return np.asarray(y, dtype=dt) diff --git a/tests/dataframe/test_aggs_pytest.py b/tests/dataframe/test_aggs_pytest.py index 2c37e5f..87ef84e 100644 --- a/tests/dataframe/test_aggs_pytest.py +++ b/tests/dataframe/test_aggs_pytest.py @@ -94,7 +94,7 @@ class TestDataFrameAggs(TestData): # Eland returns all float values for all metric aggs, pandas can return int # TODO - investigate this more pd_aggs = pd_aggs.astype("float64") - assert_frame_equal(pd_aggs, ed_aggs, check_exact=False, check_less_precise=2) + assert_frame_equal(pd_aggs, ed_aggs, check_exact=False, rtol=2) # If Aggregate is given a string then series is returned. @pytest.mark.parametrize("agg", ["mean", "min", "max"]) diff --git a/tests/dataframe/test_describe_pytest.py b/tests/dataframe/test_describe_pytest.py index e86caf4..b7d655c 100644 --- a/tests/dataframe/test_describe_pytest.py +++ b/tests/dataframe/test_describe_pytest.py @@ -34,7 +34,7 @@ class TestDataFrameDescribe(TestData): pd_describe.drop(["25%", "50%", "75%"], axis="index"), ed_describe.drop(["25%", "50%", "75%"], axis="index"), check_exact=False, - check_less_precise=True, + rtol=True, ) # TODO - this fails for percentile fields as ES aggregations are approximate diff --git a/tests/ml/test_ml_model_pytest.py b/tests/ml/test_ml_model_pytest.py index d1be910..e5dd1e6 100644 --- a/tests/ml/test_ml_model_pytest.py +++ b/tests/ml/test_ml_model_pytest.py @@ -263,10 +263,12 @@ class TestMLModel: training_data = datasets.make_classification( n_features=5, n_classes=3, n_informative=3 ) - classifier = XGBClassifier(booster="gbtree", objective="multi:softmax") + classifier = XGBClassifier( + booster="gbtree", objective="multi:softmax", use_label_encoder=False + ) else: training_data = datasets.make_classification(n_features=5) - classifier = XGBClassifier(booster="gbtree") + classifier = XGBClassifier(booster="gbtree", use_label_encoder=False) # Train model classifier.fit(training_data[0], training_data[1]) @@ -303,10 +305,14 @@ class TestMLModel: training_data = datasets.make_classification( n_features=5, n_classes=3, n_informative=3 ) - classifier = XGBClassifier(booster=booster, objective=objective) + classifier = XGBClassifier( + booster=booster, objective=objective, use_label_encoder=False + ) else: training_data = datasets.make_classification(n_features=5) - classifier = XGBClassifier(booster=booster, objective=objective) + classifier = XGBClassifier( + booster=booster, objective=objective, use_label_encoder=False + ) # Train model classifier.fit(training_data[0], training_data[1]) diff --git a/tests/series/test_arithmetics_pytest.py b/tests/series/test_arithmetics_pytest.py index 1942a61..9f118e8 100644 --- a/tests/series/test_arithmetics_pytest.py +++ b/tests/series/test_arithmetics_pytest.py @@ -50,7 +50,7 @@ class TestSeriesArithmetics(TestData): + ed_df["total_quantity"] ) - assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) def test_ecommerce_series_simple_integer_addition(self): pd_df = self.pd_ecommerce().head(100) @@ -59,7 +59,7 @@ class TestSeriesArithmetics(TestData): pd_series = pd_df["taxful_total_price"] + 5 ed_series = ed_df["taxful_total_price"] + 5 - assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) def test_ecommerce_series_simple_series_addition(self): pd_df = self.pd_ecommerce().head(100) @@ -68,7 +68,7 @@ class TestSeriesArithmetics(TestData): pd_series = pd_df["taxful_total_price"] + pd_df["total_quantity"] ed_series = ed_df["taxful_total_price"] + ed_df["total_quantity"] - assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) def test_ecommerce_series_basic_arithmetics(self): pd_df = self.pd_ecommerce().head(100) @@ -98,27 +98,19 @@ class TestSeriesArithmetics(TestData): ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["total_quantity"] ) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) pd_series = getattr(pd_df["taxful_total_price"], op)(10.56) ed_series = getattr(ed_df["taxful_total_price"], op)(10.56) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(1.879)) ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(1.879)) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) pd_series = getattr(pd_df["taxful_total_price"], op)(int(8)) ed_series = getattr(ed_df["taxful_total_price"], op)(int(8)) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) def test_supported_series_dtypes_ops(self): pd_df = self.pd_ecommerce().head(100) @@ -153,9 +145,7 @@ class TestSeriesArithmetics(TestData): ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["taxless_total_price"] ) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) # int op float for op in numeric_ops: @@ -165,9 +155,7 @@ class TestSeriesArithmetics(TestData): ed_series = getattr(ed_df["total_quantity"], op)( ed_df["taxless_total_price"] ) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) # float op int for op in numeric_ops: @@ -177,9 +165,7 @@ class TestSeriesArithmetics(TestData): ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["total_quantity"] ) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) # str op int (throws) for op in non_string_numeric_ops: @@ -227,27 +213,19 @@ class TestSeriesArithmetics(TestData): ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["total_quantity"] ) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) pd_series = getattr(pd_df["taxful_total_price"], op)(3.141) ed_series = getattr(ed_df["taxful_total_price"], op)(3.141) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(2.879)) ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(2.879)) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) pd_series = getattr(pd_df["taxful_total_price"], op)(int(6)) ed_series = getattr(ed_df["taxful_total_price"], op)(int(6)) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) def test_supported_series_dtypes_rops(self): pd_df = self.pd_ecommerce().head(100) @@ -282,9 +260,7 @@ class TestSeriesArithmetics(TestData): ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["taxless_total_price"] ) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) # int op float for op in numeric_ops: @@ -294,9 +270,7 @@ class TestSeriesArithmetics(TestData): ed_series = getattr(ed_df["total_quantity"], op)( ed_df["taxless_total_price"] ) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) # float op int for op in numeric_ops: @@ -306,9 +280,7 @@ class TestSeriesArithmetics(TestData): ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["total_quantity"] ) - assert_pandas_eland_series_equal( - pd_series, ed_series, check_less_precise=True - ) + assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True) # str op int (throws) for op in non_string_numeric_ops: