Remove deprecated options and aliases

This commit is contained in:
P. Sai Vinay 2021-01-04 20:20:45 +01:00 committed by GitHub
parent f89d79b1b4
commit 27717eead1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 21 additions and 198 deletions

View File

@ -153,7 +153,7 @@ to be serialized and used as an inference model in Elasticsearch
```python ```python
>>> from xgboost import XGBClassifier >>> from xgboost import XGBClassifier
>>> from eland.ml import ImportedMLModel >>> from eland.ml import MLModel
# Train and exercise an XGBoost ML model locally # Train and exercise an XGBoost ML model locally
>>> xgb_model = XGBClassifier(booster="gbtree") >>> xgb_model = XGBClassifier(booster="gbtree")
@ -163,7 +163,7 @@ to be serialized and used as an inference model in Elasticsearch
[0 1 1 0 1 0 0 0 1 0] [0 1 1 0 1 0 0 0 1 0]
# Import the model into Elasticsearch # Import the model into Elasticsearch
>>> es_model = ImportedMLModel( >>> es_model = MLModel(
es_client="localhost:9200", es_client="localhost:9200",
model_id="xgb-classifier", model_id="xgb-classifier",
model=xgb_model, model=xgb_model,

View File

@ -3,8 +3,4 @@ eland.DataFrame.es_info
.. currentmodule:: eland .. currentmodule:: eland
.. warning::
Previously this method was named ``info_es()``.
DataFrame.info_es() is deprecated, use DataFrame.es_info() instead.
.. automethod:: DataFrame.es_info .. automethod:: DataFrame.es_info

View File

@ -3,8 +3,4 @@ eland.Series.es_info
.. currentmodule:: eland .. currentmodule:: eland
.. warning::
Previously this method was named ``info_es()``.
Series.info_es() is deprecated, use Series.es_info() instead.
.. automethod:: Series.es_info .. automethod:: Series.es_info

View File

@ -27,7 +27,7 @@ from ._version import ( # noqa: F401
) )
from .common import SortOrder from .common import SortOrder
from .dataframe import DataFrame from .dataframe import DataFrame
from .etl import csv_to_eland, eland_to_pandas, pandas_to_eland, read_csv, read_es from .etl import csv_to_eland, eland_to_pandas, pandas_to_eland
from .index import Index from .index import Index
from .ndframe import NDFrame from .ndframe import NDFrame
from .series import Series from .series import Series
@ -40,7 +40,5 @@ __all__ = [
"pandas_to_eland", "pandas_to_eland",
"eland_to_pandas", "eland_to_pandas",
"csv_to_eland", "csv_to_eland",
"read_csv",
"read_es",
"SortOrder", "SortOrder",
] ]

View File

@ -39,7 +39,7 @@ from eland.filter import BooleanFilter
from eland.groupby import DataFrameGroupBy from eland.groupby import DataFrameGroupBy
from eland.ndframe import NDFrame from eland.ndframe import NDFrame
from eland.series import Series from eland.series import Series
from eland.utils import deprecated_api, is_valid_attr_name from eland.utils import is_valid_attr_name
class DataFrame(NDFrame): class DataFrame(NDFrame):
@ -628,10 +628,6 @@ class DataFrame(NDFrame):
return buf.getvalue() return buf.getvalue()
@deprecated_api("eland.DataFrame.es_info()")
def info_es(self):
return self.es_info()
def es_match( def es_match(
self, self,
text: str, text: str,

View File

@ -27,37 +27,6 @@ from pandas.io.parsers import _c_parser_defaults # type: ignore
from eland import DataFrame from eland import DataFrame
from eland.common import DEFAULT_CHUNK_SIZE, ensure_es_client from eland.common import DEFAULT_CHUNK_SIZE, ensure_es_client
from eland.field_mappings import FieldMappings, verify_mapping_compatibility from eland.field_mappings import FieldMappings, verify_mapping_compatibility
from eland.utils import deprecated_api
@deprecated_api("eland.DataFrame()")
def read_es(
es_client: Union[str, List[str], Tuple[str, ...], Elasticsearch],
es_index_pattern: str,
) -> DataFrame:
"""
Utility method to create an eland.Dataframe from an Elasticsearch index_pattern.
(Similar to pandas.read_csv, but source data is an Elasticsearch index rather than
a csv file)
Parameters
----------
es_client: Elasticsearch client argument(s)
- elasticsearch-py parameters or
- elasticsearch-py instance
es_index_pattern: str
Elasticsearch index pattern
Returns
-------
eland.DataFrame
See Also
--------
eland.pandas_to_eland: Create an eland.Dataframe from pandas.DataFrame
eland.eland_to_pandas: Create a pandas.Dataframe from eland.DataFrame
"""
return DataFrame(es_client=es_client, es_index_pattern=es_index_pattern)
def pandas_to_eland( def pandas_to_eland(
@ -552,8 +521,3 @@ def csv_to_eland( # type: ignore
# Now create an eland.DataFrame that references the new index # Now create an eland.DataFrame that references the new index
return DataFrame(es_client, es_index_pattern=es_dest_index) return DataFrame(es_client, es_index_pattern=es_dest_index)
@deprecated_api("eland.csv_to_eland()")
def read_csv(*args, **kwargs) -> DataFrame: # type: ignore
return csv_to_eland(*args, **kwargs)

View File

@ -17,8 +17,6 @@
from typing import TYPE_CHECKING, Optional, TextIO from typing import TYPE_CHECKING, Optional, TextIO
from eland.utils import deprecated_api
if TYPE_CHECKING: if TYPE_CHECKING:
from .query_compiler import QueryCompiler from .query_compiler import QueryCompiler
@ -95,7 +93,3 @@ class Index:
buf.write("Index:\n") buf.write("Index:\n")
buf.write(f" es_index_field: {self.es_index_field}\n") buf.write(f" es_index_field: {self.es_index_field}\n")
buf.write(f" is_source_field: {self.is_source_field}\n") buf.write(f" is_source_field: {self.is_source_field}\n")
@deprecated_api("eland.Index.es_info()")
def info_es(self, buf: TextIO) -> None:
self.es_info(buf)

View File

@ -15,9 +15,6 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
from eland.ml.ml_model import ImportedMLModel, MLModel from eland.ml.ml_model import ImportedMLModel, MLModel # noqa: F401
__all__ = [ __all__ = ["MLModel"]
"MLModel",
"ImportedMLModel",
]

View File

@ -15,7 +15,6 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
import warnings
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
import elasticsearch import elasticsearch
@ -105,7 +104,7 @@ class MLModel:
-------- --------
>>> from sklearn import datasets >>> from sklearn import datasets
>>> from xgboost import XGBRegressor >>> from xgboost import XGBRegressor
>>> from eland.ml import ImportedMLModel >>> from eland.ml import MLModel
>>> # Train model >>> # Train model
>>> training_data = datasets.make_classification(n_features=6, random_state=0) >>> training_data = datasets.make_classification(n_features=6, random_state=0)
@ -254,7 +253,6 @@ class MLModel:
classification_labels: Optional[List[str]] = None, classification_labels: Optional[List[str]] = None,
classification_weights: Optional[List[float]] = None, classification_weights: Optional[List[float]] = None,
es_if_exists: Optional[str] = None, es_if_exists: Optional[str] = None,
overwrite: Optional[bool] = None,
es_compress_model_definition: bool = True, es_compress_model_definition: bool = True,
) -> "MLModel": ) -> "MLModel":
""" """
@ -318,9 +316,6 @@ class MLModel:
- fail: Raise a Value Error - fail: Raise a Value Error
- replace: Overwrite existing model - replace: Overwrite existing model
overwrite: **DEPRECATED** - bool
Delete and overwrite existing model (if exists)
es_compress_model_definition: bool es_compress_model_definition: bool
If True will use 'compressed_definition' which uses gzipped If True will use 'compressed_definition' which uses gzipped
JSON instead of raw JSON to reduce the amount of data sent JSON instead of raw JSON to reduce the amount of data sent
@ -370,20 +365,7 @@ class MLModel:
serializer = transformer.transform() serializer = transformer.transform()
model_type = transformer.model_type model_type = transformer.model_type
# Verify if both parameters are given if es_if_exists is None:
if overwrite is not None and es_if_exists is not None:
raise ValueError(
"Using 'overwrite' and 'es_if_exists' together is invalid, use only 'es_if_exists'"
)
if overwrite is not None:
warnings.warn(
"'overwrite' parameter is deprecated, use 'es_if_exists' instead",
DeprecationWarning,
stacklevel=2,
)
es_if_exists = "replace" if overwrite else "fail"
elif es_if_exists is None:
es_if_exists = "fail" es_if_exists = "fail"
ml_model = MLModel( ml_model = MLModel(

View File

@ -59,7 +59,7 @@ from eland.filter import (
ScriptFilter, ScriptFilter,
) )
from eland.ndframe import NDFrame from eland.ndframe import NDFrame
from eland.utils import deprecated_api, to_list from eland.utils import to_list
if TYPE_CHECKING: # type: ignore if TYPE_CHECKING: # type: ignore
from elasticsearch import Elasticsearch # noqa: F401 from elasticsearch import Elasticsearch # noqa: F401
@ -712,10 +712,6 @@ class Series(NDFrame):
return buf.getvalue() return buf.getvalue()
@deprecated_api("eland.Series.es_info()")
def info_es(self) -> str:
return self.es_info()
def __add__(self, right): def __add__(self, right):
""" """
Return addition of series and right, element-wise (binary operator add). Return addition of series and right, element-wise (binary operator add).

View File

@ -80,7 +80,7 @@ def check_prediction_equality(es_model, py_model, test_data):
np.testing.assert_almost_equal(test_results, es_results, decimal=2) np.testing.assert_almost_equal(test_results, es_results, decimal=2)
class TestImportedMLModel: class TestMLModel:
@requires_no_ml_extras @requires_no_ml_extras
def test_import_ml_model_when_dependencies_are_not_available(self): def test_import_ml_model_when_dependencies_are_not_available(self):
from eland.ml import MLModel # noqa: F401 from eland.ml import MLModel # noqa: F401
@ -239,6 +239,17 @@ class TestImportedMLModel:
es_model, regressor, random_rows(training_data[0], 20) es_model, regressor, random_rows(training_data[0], 20)
) )
match = f"Trained machine learning model {model_id} already exists"
with pytest.raises(ValueError, match=match):
MLModel.import_model(
ES_TEST_CLIENT,
model_id,
regressor,
feature_names,
es_if_exists="fail",
es_compress_model_definition=compress_model_definition,
)
# Clean up # Clean up
es_model.delete_model() es_model.delete_model()
@ -460,110 +471,3 @@ class TestImportedMLModel:
# Clean up # Clean up
es_model.delete_model() es_model.delete_model()
# If both overwrite and es_if_exists is given.
@requires_sklearn
@pytest.mark.parametrize("compress_model_definition", [True, False])
@pytest.mark.parametrize("es_if_exists", ["fail", "replace"])
@pytest.mark.parametrize("overwrite", [True, False])
def test_imported_mlmodel_bothparams(
self, compress_model_definition, es_if_exists, overwrite
):
# Train model
training_data = datasets.make_regression(n_features=5)
regressor = RandomForestRegressor()
regressor.fit(training_data[0], training_data[1])
feature_names = ["f0", "f1", "f2", "f3", "f4"]
model_id = "test_random_forest_regressor"
match = "Using 'overwrite' and 'es_if_exists' together is invalid, use only 'es_if_exists'"
with pytest.raises(ValueError, match=match):
MLModel.import_model(
ES_TEST_CLIENT,
model_id,
regressor,
feature_names,
es_if_exists=es_if_exists,
overwrite=overwrite,
es_compress_model_definition=compress_model_definition,
)
# Deprecation warning for overwrite parameter
@requires_sklearn
@pytest.mark.parametrize("compress_model_definition", [True, False])
@pytest.mark.parametrize("overwrite", [True])
def test_imported_mlmodel_overwrite_true(
self, compress_model_definition, overwrite
):
# Train model
training_data = datasets.make_regression(n_features=5)
regressor = RandomForestRegressor()
regressor.fit(training_data[0], training_data[1])
feature_names = ["f0", "f1", "f2", "f3", "f4"]
model_id = "test_random_forest_regressor"
match = "'overwrite' parameter is deprecated, use 'es_if_exists' instead"
with pytest.warns(DeprecationWarning, match=match):
MLModel.import_model(
ES_TEST_CLIENT,
model_id,
regressor,
feature_names,
overwrite=overwrite,
es_compress_model_definition=compress_model_definition,
)
@requires_sklearn
@pytest.mark.parametrize("compress_model_definition", [True, False])
@pytest.mark.parametrize("overwrite", [False])
def test_imported_mlmodel_overwrite_false(
self, compress_model_definition, overwrite
):
# Train model
training_data = datasets.make_regression(n_features=5)
regressor = RandomForestRegressor()
regressor.fit(training_data[0], training_data[1])
feature_names = ["f0", "f1", "f2", "f3", "f4"]
model_id = "test_random_forest_regressor"
match_error = f"Trained machine learning model {model_id} already exists"
match_warning = (
"'overwrite' parameter is deprecated, use 'es_if_exists' instead"
)
with pytest.raises(ValueError, match=match_error):
with pytest.warns(DeprecationWarning, match=match_warning):
MLModel.import_model(
ES_TEST_CLIENT,
model_id,
regressor,
feature_names,
overwrite=overwrite,
es_compress_model_definition=compress_model_definition,
)
# Raise ValueError if Model exists when es_if_exists = 'fail'
@requires_sklearn
@pytest.mark.parametrize("compress_model_definition", [True, False])
def test_es_if_exists_fail(self, compress_model_definition):
# Train model
training_data = datasets.make_regression(n_features=5)
regressor = RandomForestRegressor()
regressor.fit(training_data[0], training_data[1])
feature_names = ["f0", "f1", "f2", "f3", "f4"]
model_id = "test_random_forest_regressor"
# If both overwrite and es_if_exists is given.
match = f"Trained machine learning model {model_id} already exists"
with pytest.raises(ValueError, match=match):
MLModel.import_model(
ES_TEST_CLIENT,
model_id,
regressor,
feature_names,
es_if_exists="fail",
es_compress_model_definition=compress_model_definition,
)