mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Update to latest ES versions and fix unit tests (#512)
Update the test matrix to the latest Elasticsearch versions and fix the broken unit tests on the CI.
This commit is contained in:
parent
c55516f376
commit
2ea96322b3
@ -12,7 +12,7 @@ set -euxo pipefail
|
|||||||
|
|
||||||
TEST_SUITE=${TEST_SUITE-xpack}
|
TEST_SUITE=${TEST_SUITE-xpack}
|
||||||
NODE_NAME=localhost
|
NODE_NAME=localhost
|
||||||
PANDAS_VERSION=${PANDAS_VERSION-1.3.0}
|
PANDAS_VERSION=${PANDAS_VERSION-1.5.0}
|
||||||
|
|
||||||
|
|
||||||
elasticsearch_image=elasticsearch
|
elasticsearch_image=elasticsearch
|
||||||
|
@ -1,18 +1,16 @@
|
|||||||
---
|
---
|
||||||
|
|
||||||
ELASTICSEARCH_VERSION:
|
ELASTICSEARCH_VERSION:
|
||||||
- '8.1.0-SNAPSHOT'
|
- '8.6.0-SNAPSHOT'
|
||||||
- '8.0.0-SNAPSHOT'
|
- '8.7.0-SNAPSHOT'
|
||||||
|
|
||||||
PANDAS_VERSION:
|
PANDAS_VERSION:
|
||||||
- '1.2.0'
|
- '1.5.0'
|
||||||
- '1.3.0'
|
|
||||||
|
|
||||||
PYTHON_VERSION:
|
PYTHON_VERSION:
|
||||||
- '3.10'
|
- '3.10'
|
||||||
- '3.9'
|
- '3.9'
|
||||||
- '3.8'
|
- '3.8'
|
||||||
- '3.7'
|
|
||||||
|
|
||||||
TEST_SUITE:
|
TEST_SUITE:
|
||||||
- xpack
|
- xpack
|
||||||
|
@ -49,7 +49,7 @@ $ conda install -c conda-forge eland
|
|||||||
|
|
||||||
### Compatibility
|
### Compatibility
|
||||||
|
|
||||||
- Supports Python 3.7+ and Pandas 1.3
|
- Supports Python 3.8+ and Pandas 1.5
|
||||||
- Supports Elasticsearch clusters that are 7.11+, recommended 8.3 or later for all features to work.
|
- Supports Elasticsearch clusters that are 7.11+, recommended 8.3 or later for all features to work.
|
||||||
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
|
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
|
||||||
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions
|
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
elasticsearch>=7.7
|
elasticsearch>=7.7
|
||||||
pandas>=1.2.0
|
pandas>=1.5
|
||||||
matplotlib
|
matplotlib>=3.6
|
||||||
nbval
|
nbval
|
||||||
scikit-learn>=0.22.1
|
scikit-learn>=0.22.1
|
||||||
xgboost>=1
|
xgboost>=1
|
||||||
lightgbm
|
lightgbm
|
||||||
|
sphinx==5.3.0
|
||||||
nbsphinx
|
nbsphinx
|
||||||
git+https://github.com/pandas-dev/pydata-sphinx-theme.git
|
git+https://github.com/pandas-dev/pydata-sphinx-theme.git
|
||||||
|
|
||||||
|
@ -172,7 +172,7 @@ class NDFrame(ABC):
|
|||||||
head = self.head(head_rows).to_pandas()
|
head = self.head(head_rows).to_pandas()
|
||||||
tail = self.tail(tail_rows).to_pandas()
|
tail = self.tail(tail_rows).to_pandas()
|
||||||
|
|
||||||
return head.append(tail)
|
return pd.concat([head, tail])
|
||||||
|
|
||||||
def __sizeof__(self) -> int:
|
def __sizeof__(self) -> int:
|
||||||
# Don't default to pandas, just return approximation TODO - make this more accurate
|
# Don't default to pandas, just return approximation TODO - make this more accurate
|
||||||
|
@ -714,7 +714,7 @@ class Series(NDFrame):
|
|||||||
>>> ed_ecommerce = ed.DataFrame('http://localhost:9200', 'ecommerce')
|
>>> ed_ecommerce = ed.DataFrame('http://localhost:9200', 'ecommerce')
|
||||||
>>> ed_ecommerce["day_of_week"].mode()
|
>>> ed_ecommerce["day_of_week"].mode()
|
||||||
0 Thursday
|
0 Thursday
|
||||||
dtype: object
|
Name: day_of_week, dtype: object
|
||||||
|
|
||||||
>>> ed_ecommerce["order_date"].mode()
|
>>> ed_ecommerce["order_date"].mode()
|
||||||
0 2016-12-02 20:36:58
|
0 2016-12-02 20:36:58
|
||||||
@ -727,16 +727,18 @@ class Series(NDFrame):
|
|||||||
7 2016-12-15 11:38:24
|
7 2016-12-15 11:38:24
|
||||||
8 2016-12-22 19:39:22
|
8 2016-12-22 19:39:22
|
||||||
9 2016-12-24 06:21:36
|
9 2016-12-24 06:21:36
|
||||||
dtype: datetime64[ns]
|
Name: order_date, dtype: datetime64[ns]
|
||||||
|
|
||||||
>>> ed_ecommerce["order_date"].mode(es_size=3)
|
>>> ed_ecommerce["order_date"].mode(es_size=3)
|
||||||
0 2016-12-02 20:36:58
|
0 2016-12-02 20:36:58
|
||||||
1 2016-12-04 23:44:10
|
1 2016-12-04 23:44:10
|
||||||
2 2016-12-08 06:21:36
|
2 2016-12-08 06:21:36
|
||||||
dtype: datetime64[ns]
|
Name: order_date, dtype: datetime64[ns]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return self._query_compiler.mode(is_dataframe=False, es_size=es_size)
|
result = self._query_compiler.mode(is_dataframe=False, es_size=es_size)
|
||||||
|
result.name = self.name
|
||||||
|
return result
|
||||||
|
|
||||||
def es_match(
|
def es_match(
|
||||||
self,
|
self,
|
||||||
|
@ -100,8 +100,8 @@ def lint(session):
|
|||||||
session.error("\n" + "\n".join(sorted(set(errors))))
|
session.error("\n" + "\n".join(sorted(set(errors))))
|
||||||
|
|
||||||
|
|
||||||
@nox.session(python=["3.7", "3.8", "3.9", "3.10"])
|
@nox.session(python=["3.8", "3.9", "3.10"])
|
||||||
@nox.parametrize("pandas_version", ["1.2.0", "1.3.0"])
|
@nox.parametrize("pandas_version", ["1.5.0"])
|
||||||
def test(session, pandas_version: str):
|
def test(session, pandas_version: str):
|
||||||
session.install("-r", "requirements-dev.txt")
|
session.install("-r", "requirements-dev.txt")
|
||||||
session.install(".")
|
session.install(".")
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
# Basic requirements
|
# Basic requirements
|
||||||
#
|
#
|
||||||
elasticsearch>=8.3,<9
|
elasticsearch>=8.3,<9
|
||||||
pandas>=1.2,<2
|
pandas>=1.5
|
||||||
matplotlib<4
|
matplotlib>=3.6
|
||||||
numpy<2
|
numpy<2
|
||||||
tqdm<5
|
tqdm<5
|
||||||
|
|
||||||
@ -12,7 +12,6 @@ tqdm<5
|
|||||||
#
|
#
|
||||||
scikit-learn>=0.22.1,<2
|
scikit-learn>=0.22.1,<2
|
||||||
xgboost>=0.90,<2
|
xgboost>=0.90,<2
|
||||||
scikit-learn>=0.22.1,<2
|
|
||||||
lightgbm>=2,<4
|
lightgbm>=2,<4
|
||||||
|
|
||||||
# PyTorch doesn't support Python 3.10 yet (pytorch/pytorch#66424)
|
# PyTorch doesn't support Python 3.10 yet (pytorch/pytorch#66424)
|
||||||
|
@ -2,6 +2,6 @@
|
|||||||
# Basic requirements
|
# Basic requirements
|
||||||
#
|
#
|
||||||
elasticsearch>=8.3,<9
|
elasticsearch>=8.3,<9
|
||||||
pandas>=1.2,<2
|
pandas>=1.5
|
||||||
matplotlib<4
|
matplotlib>=3.6
|
||||||
numpy<2
|
numpy<2
|
||||||
|
6
setup.py
6
setup.py
@ -83,12 +83,12 @@ setup(
|
|||||||
packages=find_packages(include=["eland", "eland.*"]),
|
packages=find_packages(include=["eland", "eland.*"]),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"elasticsearch>=8.3,<9",
|
"elasticsearch>=8.3,<9",
|
||||||
"pandas>=1.2,<2",
|
"pandas>=1.5",
|
||||||
"matplotlib<4",
|
"matplotlib>=3.6",
|
||||||
"numpy<2",
|
"numpy<2",
|
||||||
],
|
],
|
||||||
scripts=["bin/eland_import_hub_model"],
|
scripts=["bin/eland_import_hub_model"],
|
||||||
python_requires=">=3.7",
|
python_requires=">=3.8",
|
||||||
package_data={"eland": ["py.typed"]},
|
package_data={"eland": ["py.typed"]},
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
|
@ -15,6 +15,8 @@
|
|||||||
# specific language governing permissions and limitations
|
# specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
|
||||||
@ -30,15 +32,21 @@ from tests import (
|
|||||||
ECOMMERCE_DF_FILE_NAME,
|
ECOMMERCE_DF_FILE_NAME,
|
||||||
ECOMMERCE_INDEX_NAME,
|
ECOMMERCE_INDEX_NAME,
|
||||||
ES_TEST_CLIENT,
|
ES_TEST_CLIENT,
|
||||||
FLIGHTS_DF_FILE_NAME,
|
FLIGHTS_FILE_NAME,
|
||||||
FLIGHTS_INDEX_NAME,
|
FLIGHTS_INDEX_NAME,
|
||||||
FLIGHTS_SMALL_INDEX_NAME,
|
FLIGHTS_SMALL_INDEX_NAME,
|
||||||
)
|
)
|
||||||
|
|
||||||
_pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index()
|
_ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
|
||||||
|
flight_records = []
|
||||||
|
with gzip.open(FLIGHTS_FILE_NAME) as f:
|
||||||
|
for json_obj in f:
|
||||||
|
flight_records.append(json.loads(json_obj))
|
||||||
|
_pd_flights = pd.DataFrame.from_records(flight_records).reindex(
|
||||||
|
_ed_flights.columns, axis=1
|
||||||
|
)
|
||||||
_pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
|
_pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
|
||||||
_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
|
_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
|
||||||
_ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME)
|
|
||||||
|
|
||||||
_pd_flights_small = _pd_flights.head(48)
|
_pd_flights_small = _pd_flights.head(48)
|
||||||
_ed_flights_small = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME)
|
_ed_flights_small = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME)
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user