diff --git a/eland/field_mappings.py b/eland/field_mappings.py index 92ac7f7..d9998e0 100644 --- a/eland/field_mappings.py +++ b/eland/field_mappings.py @@ -42,6 +42,8 @@ from pandas.core.dtypes.common import ( # type: ignore ) from pandas.core.dtypes.inference import is_list_like +from .common import es_version + if TYPE_CHECKING: from elasticsearch import Elasticsearch from numpy.typing import DTypeLike @@ -213,7 +215,7 @@ class FieldMappings: # Get all fields (including all nested) and then all field_caps all_fields = FieldMappings._extract_fields_from_mapping(get_mapping) - all_fields_caps = client.field_caps(index=index_pattern, fields="*") + all_fields_caps = _compat_field_caps(client, index=index_pattern, fields="*") # Get top level (not sub-field multifield) mappings source_fields = FieldMappings._extract_fields_from_mapping( @@ -925,3 +927,34 @@ def verify_mapping_compatibility( f"DataFrame dtypes and Elasticsearch index mapping " f"aren't compatible:\n{problems_message}" ) + + +def _compat_field_caps(client, fields, index=None): + """The field_caps API moved it's 'fields' parameter to the HTTP request body + in Elasticsearch 8.5.0 (previously was only accepted in the query string). + This can cause some unfortunate errors for users of Eland against old server + versions because at that point the version of the Elasticsearch client actually + matters for compatibility, which can be unexpected by consumers of *only* Eland. + + Our work-around below is to force the parameter in the query string on older server versions. + """ + + # If the server version is 8.5.0 or later we don't need + # the query string work-around. Sending via any client + # version should be just fine. + if es_version(client) >= (8, 5, 0): + return client.field_caps(index=index, fields=fields) + + # Otherwise we need to force sending via the query string. + from elasticsearch._sync.client import SKIP_IN_PATH, _quote + + if index not in SKIP_IN_PATH: + __path = f"/{_quote(index)}/_field_caps" + else: + __path = "/_field_caps" + __query: Dict[str, Any] = {} + if fields is not None: + __query["fields"] = fields + return client.perform_request( + "POST", __path, params=__query, headers={"accept": "application/json"} + ) diff --git a/tests/field_mappings/test_compat_field_caps_pytest.py b/tests/field_mappings/test_compat_field_caps_pytest.py new file mode 100644 index 0000000..867bf03 --- /dev/null +++ b/tests/field_mappings/test_compat_field_caps_pytest.py @@ -0,0 +1,60 @@ +# Licensed to Elasticsearch B.V. under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch B.V. licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# File called _pytest for PyCharm compatability +from unittest import mock + +from eland.field_mappings import _compat_field_caps +from tests.common import TestData + + +class TestCompatFieldCaps(TestData): + def test_query_for_es_8_4_4(self): + # Elasticsearch server <8.5.0 should use the raw perform_request() + client = mock.Mock() + client._eland_es_version = (8, 4, 4) + + _compat_field_caps(client, fields="*", index="test-index") + + client.perform_request.assert_called_with( + "POST", + "/test-index/_field_caps", + params={"fields": "*"}, + headers={"accept": "application/json"}, + ) + + _compat_field_caps(client, fields="*") + + client.perform_request.assert_called_with( + "POST", + "/_field_caps", + params={"fields": "*"}, + headers={"accept": "application/json"}, + ) + + def test_query_for_es_8_5_0(self): + # Elasticsearch server >=8.5.0 should use the client API. + client = mock.Mock() + client._eland_es_version = (8, 5, 0) + + _compat_field_caps(client, fields="*", index="test-index") + + client.field_caps.assert_called_with(fields="*", index="test-index") + + _compat_field_caps(client, fields="*") + + client.field_caps.assert_called_with(fields="*", index=None)