mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Update to latest ES versions and fix unit tests (#512)
Update the test matrix to the latest Elasticsearch versions and fix the broken unit tests on the CI.
This commit is contained in:
parent
c55516f376
commit
2ea96322b3
@ -12,7 +12,7 @@ set -euxo pipefail
|
||||
|
||||
TEST_SUITE=${TEST_SUITE-xpack}
|
||||
NODE_NAME=localhost
|
||||
PANDAS_VERSION=${PANDAS_VERSION-1.3.0}
|
||||
PANDAS_VERSION=${PANDAS_VERSION-1.5.0}
|
||||
|
||||
|
||||
elasticsearch_image=elasticsearch
|
||||
|
@ -1,18 +1,16 @@
|
||||
---
|
||||
|
||||
ELASTICSEARCH_VERSION:
|
||||
- '8.1.0-SNAPSHOT'
|
||||
- '8.0.0-SNAPSHOT'
|
||||
- '8.6.0-SNAPSHOT'
|
||||
- '8.7.0-SNAPSHOT'
|
||||
|
||||
PANDAS_VERSION:
|
||||
- '1.2.0'
|
||||
- '1.3.0'
|
||||
- '1.5.0'
|
||||
|
||||
PYTHON_VERSION:
|
||||
- '3.10'
|
||||
- '3.9'
|
||||
- '3.8'
|
||||
- '3.7'
|
||||
|
||||
TEST_SUITE:
|
||||
- xpack
|
||||
|
@ -49,7 +49,7 @@ $ conda install -c conda-forge eland
|
||||
|
||||
### Compatibility
|
||||
|
||||
- Supports Python 3.7+ and Pandas 1.3
|
||||
- Supports Python 3.8+ and Pandas 1.5
|
||||
- Supports Elasticsearch clusters that are 7.11+, recommended 8.3 or later for all features to work.
|
||||
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
|
||||
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions
|
||||
|
@ -1,10 +1,11 @@
|
||||
elasticsearch>=7.7
|
||||
pandas>=1.2.0
|
||||
matplotlib
|
||||
pandas>=1.5
|
||||
matplotlib>=3.6
|
||||
nbval
|
||||
scikit-learn>=0.22.1
|
||||
xgboost>=1
|
||||
lightgbm
|
||||
sphinx==5.3.0
|
||||
nbsphinx
|
||||
git+https://github.com/pandas-dev/pydata-sphinx-theme.git
|
||||
|
||||
|
@ -172,7 +172,7 @@ class NDFrame(ABC):
|
||||
head = self.head(head_rows).to_pandas()
|
||||
tail = self.tail(tail_rows).to_pandas()
|
||||
|
||||
return head.append(tail)
|
||||
return pd.concat([head, tail])
|
||||
|
||||
def __sizeof__(self) -> int:
|
||||
# Don't default to pandas, just return approximation TODO - make this more accurate
|
||||
|
@ -714,7 +714,7 @@ class Series(NDFrame):
|
||||
>>> ed_ecommerce = ed.DataFrame('http://localhost:9200', 'ecommerce')
|
||||
>>> ed_ecommerce["day_of_week"].mode()
|
||||
0 Thursday
|
||||
dtype: object
|
||||
Name: day_of_week, dtype: object
|
||||
|
||||
>>> ed_ecommerce["order_date"].mode()
|
||||
0 2016-12-02 20:36:58
|
||||
@ -727,16 +727,18 @@ class Series(NDFrame):
|
||||
7 2016-12-15 11:38:24
|
||||
8 2016-12-22 19:39:22
|
||||
9 2016-12-24 06:21:36
|
||||
dtype: datetime64[ns]
|
||||
Name: order_date, dtype: datetime64[ns]
|
||||
|
||||
>>> ed_ecommerce["order_date"].mode(es_size=3)
|
||||
0 2016-12-02 20:36:58
|
||||
1 2016-12-04 23:44:10
|
||||
2 2016-12-08 06:21:36
|
||||
dtype: datetime64[ns]
|
||||
Name: order_date, dtype: datetime64[ns]
|
||||
|
||||
"""
|
||||
return self._query_compiler.mode(is_dataframe=False, es_size=es_size)
|
||||
result = self._query_compiler.mode(is_dataframe=False, es_size=es_size)
|
||||
result.name = self.name
|
||||
return result
|
||||
|
||||
def es_match(
|
||||
self,
|
||||
|
@ -100,8 +100,8 @@ def lint(session):
|
||||
session.error("\n" + "\n".join(sorted(set(errors))))
|
||||
|
||||
|
||||
@nox.session(python=["3.7", "3.8", "3.9", "3.10"])
|
||||
@nox.parametrize("pandas_version", ["1.2.0", "1.3.0"])
|
||||
@nox.session(python=["3.8", "3.9", "3.10"])
|
||||
@nox.parametrize("pandas_version", ["1.5.0"])
|
||||
def test(session, pandas_version: str):
|
||||
session.install("-r", "requirements-dev.txt")
|
||||
session.install(".")
|
||||
|
@ -2,8 +2,8 @@
|
||||
# Basic requirements
|
||||
#
|
||||
elasticsearch>=8.3,<9
|
||||
pandas>=1.2,<2
|
||||
matplotlib<4
|
||||
pandas>=1.5
|
||||
matplotlib>=3.6
|
||||
numpy<2
|
||||
tqdm<5
|
||||
|
||||
@ -12,7 +12,6 @@ tqdm<5
|
||||
#
|
||||
scikit-learn>=0.22.1,<2
|
||||
xgboost>=0.90,<2
|
||||
scikit-learn>=0.22.1,<2
|
||||
lightgbm>=2,<4
|
||||
|
||||
# PyTorch doesn't support Python 3.10 yet (pytorch/pytorch#66424)
|
||||
|
@ -2,6 +2,6 @@
|
||||
# Basic requirements
|
||||
#
|
||||
elasticsearch>=8.3,<9
|
||||
pandas>=1.2,<2
|
||||
matplotlib<4
|
||||
pandas>=1.5
|
||||
matplotlib>=3.6
|
||||
numpy<2
|
||||
|
6
setup.py
6
setup.py
@ -83,12 +83,12 @@ setup(
|
||||
packages=find_packages(include=["eland", "eland.*"]),
|
||||
install_requires=[
|
||||
"elasticsearch>=8.3,<9",
|
||||
"pandas>=1.2,<2",
|
||||
"matplotlib<4",
|
||||
"pandas>=1.5",
|
||||
"matplotlib>=3.6",
|
||||
"numpy<2",
|
||||
],
|
||||
scripts=["bin/eland_import_hub_model"],
|
||||
python_requires=">=3.7",
|
||||
python_requires=">=3.8",
|
||||
package_data={"eland": ["py.typed"]},
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
|
@ -15,6 +15,8 @@
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
from datetime import timedelta
|
||||
|
||||
@ -30,15 +32,21 @@ from tests import (
|
||||
ECOMMERCE_DF_FILE_NAME,
|
||||
ECOMMERCE_INDEX_NAME,
|
||||
ES_TEST_CLIENT,
|
||||
FLIGHTS_DF_FILE_NAME,
|
||||
FLIGHTS_FILE_NAME,
|
||||
FLIGHTS_INDEX_NAME,
|
||||
FLIGHTS_SMALL_INDEX_NAME,
|
||||
)
|
||||
|
||||
_pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index()
|
||||
_ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
|
||||
flight_records = []
|
||||
with gzip.open(FLIGHTS_FILE_NAME) as f:
|
||||
for json_obj in f:
|
||||
flight_records.append(json.loads(json_obj))
|
||||
_pd_flights = pd.DataFrame.from_records(flight_records).reindex(
|
||||
_ed_flights.columns, axis=1
|
||||
)
|
||||
_pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
|
||||
_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
|
||||
_ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
|
||||
|
||||
_pd_flights_small = _pd_flights.head(48)
|
||||
_ed_flights_small = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME)
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user