Update to latest ES versions and fix unit tests (#512)

Update the test matrix to the latest Elasticsearch versions and fix the broken unit tests on the CI.
This commit is contained in:
Valeriy Khakhutskyy 2023-01-31 20:55:29 +01:00 committed by GitHub
parent c55516f376
commit 2ea96322b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 101 additions and 93 deletions

View File

@ -12,7 +12,7 @@ set -euxo pipefail
TEST_SUITE=${TEST_SUITE-xpack} TEST_SUITE=${TEST_SUITE-xpack}
NODE_NAME=localhost NODE_NAME=localhost
PANDAS_VERSION=${PANDAS_VERSION-1.3.0} PANDAS_VERSION=${PANDAS_VERSION-1.5.0}
elasticsearch_image=elasticsearch elasticsearch_image=elasticsearch

View File

@ -1,18 +1,16 @@
--- ---
ELASTICSEARCH_VERSION: ELASTICSEARCH_VERSION:
- '8.1.0-SNAPSHOT' - '8.6.0-SNAPSHOT'
- '8.0.0-SNAPSHOT' - '8.7.0-SNAPSHOT'
PANDAS_VERSION: PANDAS_VERSION:
- '1.2.0' - '1.5.0'
- '1.3.0'
PYTHON_VERSION: PYTHON_VERSION:
- '3.10' - '3.10'
- '3.9' - '3.9'
- '3.8' - '3.8'
- '3.7'
TEST_SUITE: TEST_SUITE:
- xpack - xpack

View File

@ -49,7 +49,7 @@ $ conda install -c conda-forge eland
### Compatibility ### Compatibility
- Supports Python 3.7+ and Pandas 1.3 - Supports Python 3.8+ and Pandas 1.5
- Supports Elasticsearch clusters that are 7.11+, recommended 8.3 or later for all features to work. - Supports Elasticsearch clusters that are 7.11+, recommended 8.3 or later for all features to work.
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions version of your Elasticsearch cluster. For all other features it is sufficient for the major versions

View File

@ -1,10 +1,11 @@
elasticsearch>=7.7 elasticsearch>=7.7
pandas>=1.2.0 pandas>=1.5
matplotlib matplotlib>=3.6
nbval nbval
scikit-learn>=0.22.1 scikit-learn>=0.22.1
xgboost>=1 xgboost>=1
lightgbm lightgbm
sphinx==5.3.0
nbsphinx nbsphinx
git+https://github.com/pandas-dev/pydata-sphinx-theme.git git+https://github.com/pandas-dev/pydata-sphinx-theme.git

View File

@ -172,7 +172,7 @@ class NDFrame(ABC):
head = self.head(head_rows).to_pandas() head = self.head(head_rows).to_pandas()
tail = self.tail(tail_rows).to_pandas() tail = self.tail(tail_rows).to_pandas()
return head.append(tail) return pd.concat([head, tail])
def __sizeof__(self) -> int: def __sizeof__(self) -> int:
# Don't default to pandas, just return approximation TODO - make this more accurate # Don't default to pandas, just return approximation TODO - make this more accurate

View File

@ -714,7 +714,7 @@ class Series(NDFrame):
>>> ed_ecommerce = ed.DataFrame('http://localhost:9200', 'ecommerce') >>> ed_ecommerce = ed.DataFrame('http://localhost:9200', 'ecommerce')
>>> ed_ecommerce["day_of_week"].mode() >>> ed_ecommerce["day_of_week"].mode()
0 Thursday 0 Thursday
dtype: object Name: day_of_week, dtype: object
>>> ed_ecommerce["order_date"].mode() >>> ed_ecommerce["order_date"].mode()
0 2016-12-02 20:36:58 0 2016-12-02 20:36:58
@ -727,16 +727,18 @@ class Series(NDFrame):
7 2016-12-15 11:38:24 7 2016-12-15 11:38:24
8 2016-12-22 19:39:22 8 2016-12-22 19:39:22
9 2016-12-24 06:21:36 9 2016-12-24 06:21:36
dtype: datetime64[ns] Name: order_date, dtype: datetime64[ns]
>>> ed_ecommerce["order_date"].mode(es_size=3) >>> ed_ecommerce["order_date"].mode(es_size=3)
0 2016-12-02 20:36:58 0 2016-12-02 20:36:58
1 2016-12-04 23:44:10 1 2016-12-04 23:44:10
2 2016-12-08 06:21:36 2 2016-12-08 06:21:36
dtype: datetime64[ns] Name: order_date, dtype: datetime64[ns]
""" """
return self._query_compiler.mode(is_dataframe=False, es_size=es_size) result = self._query_compiler.mode(is_dataframe=False, es_size=es_size)
result.name = self.name
return result
def es_match( def es_match(
self, self,

View File

@ -100,8 +100,8 @@ def lint(session):
session.error("\n" + "\n".join(sorted(set(errors)))) session.error("\n" + "\n".join(sorted(set(errors))))
@nox.session(python=["3.7", "3.8", "3.9", "3.10"]) @nox.session(python=["3.8", "3.9", "3.10"])
@nox.parametrize("pandas_version", ["1.2.0", "1.3.0"]) @nox.parametrize("pandas_version", ["1.5.0"])
def test(session, pandas_version: str): def test(session, pandas_version: str):
session.install("-r", "requirements-dev.txt") session.install("-r", "requirements-dev.txt")
session.install(".") session.install(".")

View File

@ -2,8 +2,8 @@
# Basic requirements # Basic requirements
# #
elasticsearch>=8.3,<9 elasticsearch>=8.3,<9
pandas>=1.2,<2 pandas>=1.5
matplotlib<4 matplotlib>=3.6
numpy<2 numpy<2
tqdm<5 tqdm<5
@ -12,7 +12,6 @@ tqdm<5
# #
scikit-learn>=0.22.1,<2 scikit-learn>=0.22.1,<2
xgboost>=0.90,<2 xgboost>=0.90,<2
scikit-learn>=0.22.1,<2
lightgbm>=2,<4 lightgbm>=2,<4
# PyTorch doesn't support Python 3.10 yet (pytorch/pytorch#66424) # PyTorch doesn't support Python 3.10 yet (pytorch/pytorch#66424)

View File

@ -2,6 +2,6 @@
# Basic requirements # Basic requirements
# #
elasticsearch>=8.3,<9 elasticsearch>=8.3,<9
pandas>=1.2,<2 pandas>=1.5
matplotlib<4 matplotlib>=3.6
numpy<2 numpy<2

View File

@ -83,12 +83,12 @@ setup(
packages=find_packages(include=["eland", "eland.*"]), packages=find_packages(include=["eland", "eland.*"]),
install_requires=[ install_requires=[
"elasticsearch>=8.3,<9", "elasticsearch>=8.3,<9",
"pandas>=1.2,<2", "pandas>=1.5",
"matplotlib<4", "matplotlib>=3.6",
"numpy<2", "numpy<2",
], ],
scripts=["bin/eland_import_hub_model"], scripts=["bin/eland_import_hub_model"],
python_requires=">=3.7", python_requires=">=3.8",
package_data={"eland": ["py.typed"]}, package_data={"eland": ["py.typed"]},
include_package_data=True, include_package_data=True,
zip_safe=False, zip_safe=False,

View File

@ -15,6 +15,8 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
import gzip
import json
import os import os
from datetime import timedelta from datetime import timedelta
@ -30,15 +32,21 @@ from tests import (
ECOMMERCE_DF_FILE_NAME, ECOMMERCE_DF_FILE_NAME,
ECOMMERCE_INDEX_NAME, ECOMMERCE_INDEX_NAME,
ES_TEST_CLIENT, ES_TEST_CLIENT,
FLIGHTS_DF_FILE_NAME, FLIGHTS_FILE_NAME,
FLIGHTS_INDEX_NAME, FLIGHTS_INDEX_NAME,
FLIGHTS_SMALL_INDEX_NAME, FLIGHTS_SMALL_INDEX_NAME,
) )
_pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index() _ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
flight_records = []
with gzip.open(FLIGHTS_FILE_NAME) as f:
for json_obj in f:
flight_records.append(json.loads(json_obj))
_pd_flights = pd.DataFrame.from_records(flight_records).reindex(
_ed_flights.columns, axis=1
)
_pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"]) _pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int _pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
_ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
_pd_flights_small = _pd_flights.head(48) _pd_flights_small = _pd_flights.head(48)
_ed_flights_small = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME) _ed_flights_small = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long