mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Default truncation to second
for text similarity the task type(#713)
In reranking the first input (the query) is generally shorter. In this case it makes more sense to truncate the second input (the document text)
This commit is contained in:
parent
bee6d0e1f7
commit
fd8886da6a
@ -770,6 +770,9 @@ class TransformerModel:
|
||||
tokenization_config.span = 128
|
||||
tokenization_config.truncate = "none"
|
||||
|
||||
if self._task_type == "text_similarity":
|
||||
tokenization_config.truncate = "second"
|
||||
|
||||
if self._traceable_model.classification_labels():
|
||||
inference_config = TASK_TYPE_TO_INFERENCE_CONFIG[self._task_type](
|
||||
tokenization=tokenization_config,
|
||||
|
@ -217,6 +217,9 @@ class TestModelConfguration:
|
||||
assert isinstance(config.inference_config.classification_labels, list)
|
||||
assert len(config.inference_config.classification_labels) > 0
|
||||
|
||||
if task_type == "text_similarity":
|
||||
assert tokenization.truncate == "second"
|
||||
|
||||
del tm
|
||||
|
||||
def test_model_config_with_prefix_string(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user