diff --git a/eland/ml/pytorch/transformers.py b/eland/ml/pytorch/transformers.py index 67d4253..22bdf52 100644 --- a/eland/ml/pytorch/transformers.py +++ b/eland/ml/pytorch/transformers.py @@ -584,10 +584,10 @@ class TransformerModel: self, *, model_id: str, - access_token: Optional[str], task_type: str, es_version: Optional[Tuple[int, int, int]] = None, quantize: bool = False, + access_token: Optional[str] = None, ): """ Loads a model from the Hugging Face repository or local file and creates diff --git a/noxfile.py b/noxfile.py index aa658b4..0eeae2a 100644 --- a/noxfile.py +++ b/noxfile.py @@ -112,6 +112,8 @@ def test(session, pandas_version: str): "python", "-m", "pytest", + "-ra", + "--tb=native", "--cov-report=term-missing", "--cov=eland/", "--cov-config=setup.cfg", diff --git a/requirements-dev.txt b/requirements-dev.txt index ce16712..286d054 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -22,7 +22,7 @@ lightgbm>=2,<4 torch>=1.13.1,<2.0; python_version<'3.11' # Versions known to be compatible with PyTorch 1.13.1 sentence-transformers>=2.1.0,<=2.2.2; python_version<'3.11' -transformers[torch]>=4.12.0,<=4.27.4; python_version<'3.11' +transformers[torch]>=4.31.0,<=4.33.2; python_version<'3.11' # # Testing diff --git a/tests/ml/pytorch/test_pytorch_model_upload_pytest.py b/tests/ml/pytorch/test_pytorch_model_upload_pytest.py index 36c4086..6d9f679 100644 --- a/tests/ml/pytorch/test_pytorch_model_upload_pytest.py +++ b/tests/ml/pytorch/test_pytorch_model_upload_pytest.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import platform import tempfile import pytest @@ -82,6 +83,14 @@ def setup_and_tear_down(): pass +@pytest.fixture(scope="session") +def quantize(): + # quantization does not work on ARM processors + # TODO: It seems that PyTorch 2.0 supports OneDNN for aarch64. We should + # revisit this when we upgrade to PyTorch 2.0. + return platform.machine() not in ["arm64", "aarch64"] + + def download_model_and_start_deployment(tmp_dir, quantize, model_id, task): print("Loading HuggingFace transformer tokenizer and model") tm = TransformerModel( @@ -103,31 +112,17 @@ def download_model_and_start_deployment(tmp_dir, quantize, model_id, task): class TestPytorchModel: - def __init__(self): - # quantization does not work on ARM processors - # TODO: It seems that PyTorch 2.0 supports OneDNN for aarch64. We should - # revisit this when we upgrade to PyTorch 2.0. - import platform - - self.quantize = ( - True if platform.machine() not in ["arm64", "aarch64"] else False - ) - @pytest.mark.parametrize("model_id,task,text_input,value", TEXT_PREDICTION_MODELS) - def test_text_prediction(self, model_id, task, text_input, value): + def test_text_prediction(self, model_id, task, text_input, value, quantize): with tempfile.TemporaryDirectory() as tmp_dir: - ptm = download_model_and_start_deployment( - tmp_dir, self.quantize, model_id, task - ) - result = ptm.infer(docs=[{"text_field": text_input}]) - assert result["predicted_value"] == value + ptm = download_model_and_start_deployment(tmp_dir, quantize, model_id, task) + results = ptm.infer(docs=[{"text_field": text_input}]) + assert results.body["inference_results"][0]["predicted_value"] == value @pytest.mark.parametrize("model_id,task,text_input", TEXT_EMBEDDING_MODELS) - def test_text_embedding(self, model_id, task, text_input): + def test_text_embedding(self, model_id, task, text_input, quantize): with tempfile.TemporaryDirectory() as tmp_dir: - ptm = download_model_and_start_deployment( - tmp_dir, self.quantize, model_id, task - ) + ptm = download_model_and_start_deployment(tmp_dir, quantize, model_id, task) ptm.infer(docs=[{"text_field": text_input}]) if ES_VERSION >= (8, 8, 0):