Update to latest ES versions and fix unit tests (#512)

Update the test matrix to the latest Elasticsearch versions and fix the broken unit tests on the CI.
This commit is contained in:
Valeriy Khakhutskyy 2023-01-31 20:55:29 +01:00 committed by GitHub
parent c55516f376
commit 2ea96322b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 101 additions and 93 deletions

View File

@ -12,7 +12,7 @@ set -euxo pipefail
TEST_SUITE=${TEST_SUITE-xpack}
NODE_NAME=localhost
PANDAS_VERSION=${PANDAS_VERSION-1.3.0}
PANDAS_VERSION=${PANDAS_VERSION-1.5.0}
elasticsearch_image=elasticsearch

View File

@ -1,18 +1,16 @@
---
ELASTICSEARCH_VERSION:
- '8.1.0-SNAPSHOT'
- '8.0.0-SNAPSHOT'
- '8.6.0-SNAPSHOT'
- '8.7.0-SNAPSHOT'
PANDAS_VERSION:
- '1.2.0'
- '1.3.0'
- '1.5.0'
PYTHON_VERSION:
- '3.10'
- '3.9'
- '3.8'
- '3.7'
TEST_SUITE:
- xpack

View File

@ -49,7 +49,7 @@ $ conda install -c conda-forge eland
### Compatibility
- Supports Python 3.7+ and Pandas 1.3
- Supports Python 3.8+ and Pandas 1.5
- Supports Elasticsearch clusters that are 7.11+, recommended 8.3 or later for all features to work.
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions

View File

@ -1,10 +1,11 @@
elasticsearch>=7.7
pandas>=1.2.0
matplotlib
pandas>=1.5
matplotlib>=3.6
nbval
scikit-learn>=0.22.1
xgboost>=1
lightgbm
sphinx==5.3.0
nbsphinx
git+https://github.com/pandas-dev/pydata-sphinx-theme.git

View File

@ -172,7 +172,7 @@ class NDFrame(ABC):
head = self.head(head_rows).to_pandas()
tail = self.tail(tail_rows).to_pandas()
return head.append(tail)
return pd.concat([head, tail])
def __sizeof__(self) -> int:
# Don't default to pandas, just return approximation TODO - make this more accurate

View File

@ -714,7 +714,7 @@ class Series(NDFrame):
>>> ed_ecommerce = ed.DataFrame('http://localhost:9200', 'ecommerce')
>>> ed_ecommerce["day_of_week"].mode()
0 Thursday
dtype: object
Name: day_of_week, dtype: object
>>> ed_ecommerce["order_date"].mode()
0 2016-12-02 20:36:58
@ -727,16 +727,18 @@ class Series(NDFrame):
7 2016-12-15 11:38:24
8 2016-12-22 19:39:22
9 2016-12-24 06:21:36
dtype: datetime64[ns]
Name: order_date, dtype: datetime64[ns]
>>> ed_ecommerce["order_date"].mode(es_size=3)
0 2016-12-02 20:36:58
1 2016-12-04 23:44:10
2 2016-12-08 06:21:36
dtype: datetime64[ns]
Name: order_date, dtype: datetime64[ns]
"""
return self._query_compiler.mode(is_dataframe=False, es_size=es_size)
result = self._query_compiler.mode(is_dataframe=False, es_size=es_size)
result.name = self.name
return result
def es_match(
self,

View File

@ -100,8 +100,8 @@ def lint(session):
session.error("\n" + "\n".join(sorted(set(errors))))
@nox.session(python=["3.7", "3.8", "3.9", "3.10"])
@nox.parametrize("pandas_version", ["1.2.0", "1.3.0"])
@nox.session(python=["3.8", "3.9", "3.10"])
@nox.parametrize("pandas_version", ["1.5.0"])
def test(session, pandas_version: str):
session.install("-r", "requirements-dev.txt")
session.install(".")

View File

@ -2,8 +2,8 @@
# Basic requirements
#
elasticsearch>=8.3,<9
pandas>=1.2,<2
matplotlib<4
pandas>=1.5
matplotlib>=3.6
numpy<2
tqdm<5
@ -12,7 +12,6 @@ tqdm<5
#
scikit-learn>=0.22.1,<2
xgboost>=0.90,<2
scikit-learn>=0.22.1,<2
lightgbm>=2,<4
# PyTorch doesn't support Python 3.10 yet (pytorch/pytorch#66424)

View File

@ -2,6 +2,6 @@
# Basic requirements
#
elasticsearch>=8.3,<9
pandas>=1.2,<2
matplotlib<4
pandas>=1.5
matplotlib>=3.6
numpy<2

View File

@ -83,12 +83,12 @@ setup(
packages=find_packages(include=["eland", "eland.*"]),
install_requires=[
"elasticsearch>=8.3,<9",
"pandas>=1.2,<2",
"matplotlib<4",
"pandas>=1.5",
"matplotlib>=3.6",
"numpy<2",
],
scripts=["bin/eland_import_hub_model"],
python_requires=">=3.7",
python_requires=">=3.8",
package_data={"eland": ["py.typed"]},
include_package_data=True,
zip_safe=False,

View File

@ -15,6 +15,8 @@
# specific language governing permissions and limitations
# under the License.
import gzip
import json
import os
from datetime import timedelta
@ -30,15 +32,21 @@ from tests import (
ECOMMERCE_DF_FILE_NAME,
ECOMMERCE_INDEX_NAME,
ES_TEST_CLIENT,
FLIGHTS_DF_FILE_NAME,
FLIGHTS_FILE_NAME,
FLIGHTS_INDEX_NAME,
FLIGHTS_SMALL_INDEX_NAME,
)
_pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index()
_ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
flight_records = []
with gzip.open(FLIGHTS_FILE_NAME) as f:
for json_obj in f:
flight_records.append(json.loads(json_obj))
_pd_flights = pd.DataFrame.from_records(flight_records).reindex(
_ed_flights.columns, axis=1
)
_pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
_ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
_pd_flights_small = _pd_flights.head(48)
_ed_flights_small = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long