Fix failed import of ST RoBERTa models (#637)

Fixes an error uploading the sentence-transformers/all-distilroberta-v1 model which failed with "missing 2 required positional arguments: 'token_type_ids' and 'position_ids'". The cause was that the tokenizer type was not recognised due to a typo
2025-07-11 00:02:14 +08:00 · 2023-11-21 12:53:43 +00:00 · 2023-11-21 12:53:43 +00:00 · 081250cdec
commit 081250cdec
parent af26897313
2 changed files with 9 additions and 1 deletions
--- a/eland/ml/pytorch/transformers.py
+++ b/eland/ml/pytorch/transformers.py
@ -311,7 +311,7 @@ class _SentenceTransformerWrapperModule(nn.Module):  # type: ignore
            (
                transformers.BartTokenizer,
                transformers.MPNetTokenizer,
-                transformers.RobertaConfig,
+                transformers.RobertaTokenizer,
                transformers.XLMRobertaTokenizer,
            ),
        ):
--- a/tests/ml/pytorch/test_pytorch_model_config_pytest.py
+++ b/tests/ml/pytorch/test_pytorch_model_config_pytest.py
@ -77,6 +77,14 @@ pytestmark = [
 # have been imported
 if HAS_PYTORCH and HAS_SKLEARN and HAS_TRANSFORMERS:
    MODEL_CONFIGURATIONS = [
        (
            "sentence-transformers/all-distilroberta-v1",
            "text_embedding",
            TextEmbeddingInferenceOptions,
            NlpRobertaTokenizationConfig,
            512,
            768,
        ),
        (
            "intfloat/multilingual-e5-small",
            "text_embedding",