From d50436b01c51d4b4b0b9c97c03be2fa9a1fd04ef Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 12 Feb 2025 22:23:00 +0400 Subject: [PATCH] Upgrade transformers to 4.47 (#752) (#759) --- eland/ml/pytorch/transformers.py | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/eland/ml/pytorch/transformers.py b/eland/ml/pytorch/transformers.py index fb1bb69..40e5650 100644 --- a/eland/ml/pytorch/transformers.py +++ b/eland/ml/pytorch/transformers.py @@ -570,7 +570,7 @@ class _TraceableTextEmbeddingModel(_TransformerTraceableModel): def _prepare_inputs(self) -> transformers.BatchEncoding: return self._tokenizer( "This is an example sentence.", - padding="max_length", + padding="longest", return_tensors="pt", ) @@ -759,7 +759,7 @@ class TransformerModel: # a random or very large value. REASONABLE_MAX_LENGTH = 8192 max_len = getattr(self._tokenizer, "model_max_length", None) - if max_len is not None and max_len < REASONABLE_MAX_LENGTH: + if max_len is not None and max_len <= REASONABLE_MAX_LENGTH: return int(max_len) max_sizes = getattr(self._tokenizer, "max_model_input_sizes", dict()) diff --git a/setup.py b/setup.py index 2ad02ff..4ac65d1 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ extras = { "sentence-transformers>=2.1.0,<=2.7.0", # sentencepiece is a required dependency for the slow tokenizers # https://huggingface.co/transformers/v4.4.2/migration.html#sentencepiece-is-removed-from-the-required-dependencies - "transformers[sentencepiece]>=4.31.0,<4.44.0", + "transformers[sentencepiece]>=4.47.0", ], } extras["all"] = list({dep for deps in extras.values() for dep in deps})