mirror of
https://github.com/elastic/eland.git
synced 2025-07-24 00:00:39 +08:00
Too long frame exception fixes (#135)
* Updating test matrix for 7.6 + removing oss for now. * Resolving 7.6.0 docs issues * Updating ML docs * Resolving too_long_frame_exception on large mappings - Embedded _source parameters in bodt rather than url - Fixed bug in DataFrame.info on empty DataFrame - Removed warning from TestImportedMLModel * Resolving too_long_frame_exception on large mappings - Embedded _source parameters in bodt rather than url - Fixed bug in DataFrame.info on empty DataFrame - Removed warning from TestImportedMLModel
This commit is contained in:
parent
206677818f
commit
a33ff45ebc
@ -555,8 +555,13 @@ class DataFrame(NDFrame):
|
||||
# Print index summary e.g.
|
||||
# Index: 103 entries, 0 to 102
|
||||
# Do this by getting head and tail of dataframe
|
||||
head = self.head(1)._to_pandas().index[0]
|
||||
tail = self.tail(1)._to_pandas().index[0]
|
||||
if self.empty:
|
||||
# index[0] is out of bounds for empty df
|
||||
head = self.head(1)._to_pandas()
|
||||
tail = self.tail(1)._to_pandas()
|
||||
else:
|
||||
head = self.head(1)._to_pandas().index[0]
|
||||
tail = self.tail(1)._to_pandas().index[0]
|
||||
index_summary = ', %s to %s' % (pprint_thing(head),
|
||||
pprint_thing(tail))
|
||||
|
||||
|
@ -16,7 +16,7 @@ import warnings
|
||||
from collections import OrderedDict
|
||||
|
||||
import pandas as pd
|
||||
from pandas.core.dtypes.common import is_bool_dtype, is_datetime_or_timedelta_dtype
|
||||
from pandas.core.dtypes.common import is_datetime_or_timedelta_dtype
|
||||
|
||||
from eland import Index, SortOrder, DEFAULT_CSV_BATCH_OUTPUT_SIZE, DEFAULT_ES_MAX_RESULT_WINDOW, \
|
||||
elasticsearch_date_to_pandas_date
|
||||
@ -602,12 +602,22 @@ class Operations:
|
||||
if size is not None and size <= DEFAULT_ES_MAX_RESULT_WINDOW:
|
||||
if size > 0:
|
||||
try:
|
||||
# For query_compiler._client.search we could add _source
|
||||
# as a parameter, or add this value in body.
|
||||
#
|
||||
# If _source is a parameter it is encoded into to the url.
|
||||
#
|
||||
# If _source is a large number of fields (1000+) then this can result in an
|
||||
# extremely long url and a `too_long_frame_exception`. Therefore, add
|
||||
# _source to the body rather than as a _source parameter
|
||||
if _source:
|
||||
body['_source'] = _source
|
||||
|
||||
es_results = query_compiler._client.search(
|
||||
index=query_compiler._index_pattern,
|
||||
size=size,
|
||||
sort=sort_params,
|
||||
body=body,
|
||||
_source=_source)
|
||||
body=body)
|
||||
except Exception:
|
||||
# Catch all ES errors and print debug (currently to stdout)
|
||||
error = {
|
||||
|
49
eland/tests/dataframe/test_big_mapping_pytest.py
Normal file
49
eland/tests/dataframe/test_big_mapping_pytest.py
Normal file
@ -0,0 +1,49 @@
|
||||
# Copyright 2020 Elasticsearch BV
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
import eland as ed
|
||||
from eland.tests.common import ES_TEST_CLIENT
|
||||
from eland.tests.common import TestData
|
||||
|
||||
|
||||
class TestDataFrameBigMapping(TestData):
|
||||
|
||||
def test_big_mapping(self):
|
||||
mapping = {'mappings': {'properties': {}}}
|
||||
|
||||
for i in range(0, 1000):
|
||||
field_name = "long_field_name_" + str(i)
|
||||
mapping['mappings']['properties'][field_name] = {'type': 'float'}
|
||||
|
||||
ES_TEST_CLIENT.indices.delete(index='thousand_fields', ignore=[400, 404])
|
||||
ES_TEST_CLIENT.indices.create(index='thousand_fields', body=mapping)
|
||||
|
||||
ed_df = ed.DataFrame(ES_TEST_CLIENT, 'thousand_fields')
|
||||
ed_df.info()
|
||||
|
||||
ES_TEST_CLIENT.indices.delete(index='thousand_fields')
|
@ -15,6 +15,10 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
from io import StringIO
|
||||
|
||||
import eland as ed
|
||||
|
||||
from eland.tests import ES_TEST_CLIENT
|
||||
|
||||
from eland.tests.common import TestData
|
||||
|
||||
|
||||
@ -39,3 +43,18 @@ class TestDataFrameInfo(TestData):
|
||||
# NOTE: info does not work on truncated data frames (e.g. head/tail) TODO
|
||||
|
||||
print(self.ed_ecommerce().info())
|
||||
|
||||
def test_empty_info(self):
|
||||
mapping = {'mappings': {'properties': {}}}
|
||||
|
||||
for i in range(0, 10):
|
||||
field_name = "field_name_" + str(i)
|
||||
mapping['mappings']['properties'][field_name] = {'type': 'float'}
|
||||
|
||||
ES_TEST_CLIENT.indices.delete(index='empty_index', ignore=[400, 404])
|
||||
ES_TEST_CLIENT.indices.create(index='empty_index', body=mapping)
|
||||
|
||||
ed_df = ed.DataFrame(ES_TEST_CLIENT, 'empty_index')
|
||||
ed_df.info()
|
||||
|
||||
ES_TEST_CLIENT.indices.delete(index='empty_index')
|
||||
|
@ -120,7 +120,7 @@ class TestImportedMLModel:
|
||||
|
||||
# Get some test results
|
||||
test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]]
|
||||
test_results = classifier.predict(test_data)
|
||||
test_results = classifier.predict(np.asarray(test_data))
|
||||
|
||||
# Serialise the models to Elasticsearch
|
||||
feature_names = ["f0", "f1", "f2", "f3", "f4"]
|
||||
@ -142,7 +142,7 @@ class TestImportedMLModel:
|
||||
|
||||
# Get some test results
|
||||
test_data = [[0.1, 0.2, 0.3, -0.5, 1.0], [1.6, 2.1, -10, 50, -1.0]]
|
||||
test_results = regressor.predict(test_data)
|
||||
test_results = regressor.predict(np.asarray(test_data))
|
||||
|
||||
# Serialise the models to Elasticsearch
|
||||
feature_names = ["f0", "f1", "f2", "f3", "f4"]
|
||||
|
Loading…
x
Reference in New Issue
Block a user