mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Error when es_type_overrides receives unknown columns
This commit is contained in:
parent
28951c0ad1
commit
e17b4e03ea
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@ -31,6 +31,8 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: 3.8
|
python-version: 3.8
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: python3.8 -m pip install nox
|
run: |
|
||||||
|
sudo apt-get install --yes pandoc
|
||||||
|
python3.8 -m pip install nox
|
||||||
- name: Build documentation
|
- name: Build documentation
|
||||||
run: nox -s docs
|
run: nox -s docs
|
||||||
|
@ -42,8 +42,6 @@ from pandas.core.dtypes.inference import is_list_like
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
|
|
||||||
from eland import DataFrame
|
|
||||||
|
|
||||||
|
|
||||||
ES_FLOAT_TYPES: Set[str] = {"double", "float", "half_float", "scaled_float"}
|
ES_FLOAT_TYPES: Set[str] = {"double", "float", "half_float", "scaled_float"}
|
||||||
ES_INTEGER_TYPES: Set[str] = {"long", "integer", "short", "byte"}
|
ES_INTEGER_TYPES: Set[str] = {"long", "integer", "short", "byte"}
|
||||||
@ -463,7 +461,7 @@ class FieldMappings:
|
|||||||
return cls.ES_DTYPE_TO_PD_DTYPE.get(es_dtype, "object")
|
return cls.ES_DTYPE_TO_PD_DTYPE.get(es_dtype, "object")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _pd_dtype_to_es_dtype(pd_dtype):
|
def _pd_dtype_to_es_dtype(pd_dtype) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
Mapping pandas dtypes to Elasticsearch dtype
|
Mapping pandas dtypes to Elasticsearch dtype
|
||||||
--------------------------------------------
|
--------------------------------------------
|
||||||
@ -479,7 +477,7 @@ class FieldMappings:
|
|||||||
category NA NA Finite list of text values
|
category NA NA Finite list of text values
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
es_dtype = None
|
es_dtype: Optional[str] = None
|
||||||
|
|
||||||
# Map all to 64-bit - TODO map to specifics: int32 -> int etc.
|
# Map all to 64-bit - TODO map to specifics: int32 -> int etc.
|
||||||
if is_float_dtype(pd_dtype):
|
if is_float_dtype(pd_dtype):
|
||||||
@ -501,7 +499,7 @@ class FieldMappings:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _generate_es_mappings(
|
def _generate_es_mappings(
|
||||||
dataframe: "DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None
|
dataframe: "pd.DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None
|
||||||
) -> Dict[str, Dict[str, Dict[str, Any]]]:
|
) -> Dict[str, Dict[str, Dict[str, Any]]]:
|
||||||
"""Given a pandas dataframe, generate the associated Elasticsearch mapping
|
"""Given a pandas dataframe, generate the associated Elasticsearch mapping
|
||||||
|
|
||||||
@ -536,8 +534,19 @@ class FieldMappings:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
es_dtype: str
|
||||||
|
|
||||||
|
mapping_props: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
if es_type_overrides is not None:
|
||||||
|
non_existing_columns: List[str] = [
|
||||||
|
key for key in es_type_overrides.keys() if key not in dataframe.columns
|
||||||
|
]
|
||||||
|
if non_existing_columns:
|
||||||
|
raise KeyError(
|
||||||
|
f"{repr(non_existing_columns)[1:-1]} column(s) not in given dataframe"
|
||||||
|
)
|
||||||
|
|
||||||
mapping_props = {}
|
|
||||||
for column, dtype in dataframe.dtypes.iteritems():
|
for column, dtype in dataframe.dtypes.iteritems():
|
||||||
if es_type_overrides is not None and column in es_type_overrides:
|
if es_type_overrides is not None and column in es_type_overrides:
|
||||||
es_dtype = es_type_overrides[column]
|
es_dtype = es_type_overrides[column]
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
import eland as ed
|
import eland as ed
|
||||||
from eland.field_mappings import FieldMappings
|
from eland.field_mappings import FieldMappings
|
||||||
@ -139,3 +140,28 @@ class TestDataFrameUtils(TestData):
|
|||||||
|
|
||||||
# This test calls the same method so is redundant
|
# This test calls the same method so is redundant
|
||||||
# assert_pandas_eland_frame_equal(pd_df, self.ed_flights())
|
# assert_pandas_eland_frame_equal(pd_df, self.ed_flights())
|
||||||
|
|
||||||
|
def test_es_type_override_error(self):
|
||||||
|
|
||||||
|
df = self.pd_flights().filter(
|
||||||
|
["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
|
||||||
|
)
|
||||||
|
|
||||||
|
index_name = "test_es_type_override"
|
||||||
|
|
||||||
|
match = "'DistanceKilometers', 'DistanceMiles' column(s) not in given dataframe"
|
||||||
|
with pytest.raises(KeyError, match=match):
|
||||||
|
ed.pandas_to_eland(
|
||||||
|
df,
|
||||||
|
ES_TEST_CLIENT,
|
||||||
|
index_name,
|
||||||
|
es_if_exists="replace",
|
||||||
|
es_refresh=True,
|
||||||
|
use_pandas_index_for_es_ids=False,
|
||||||
|
es_type_overrides={
|
||||||
|
"AvgTicketPrice": "long",
|
||||||
|
"DistanceKilometers": "text",
|
||||||
|
"DistanceMiles": "text",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
ES_TEST_CLIENT.indices.delete(index=index_name)
|
||||||
|
@ -19,7 +19,6 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import elasticsearch
|
|
||||||
import nox
|
import nox
|
||||||
|
|
||||||
BASE_DIR = Path(__file__).parent
|
BASE_DIR = Path(__file__).parent
|
||||||
@ -126,6 +125,8 @@ def docs(session):
|
|||||||
# See if we have an Elasticsearch cluster active
|
# See if we have an Elasticsearch cluster active
|
||||||
# to rebuild the Jupyter notebooks with.
|
# to rebuild the Jupyter notebooks with.
|
||||||
try:
|
try:
|
||||||
|
import elasticsearch
|
||||||
|
|
||||||
es = elasticsearch.Elasticsearch("localhost:9200")
|
es = elasticsearch.Elasticsearch("localhost:9200")
|
||||||
es.info()
|
es.info()
|
||||||
if not es.indices.exists("flights"):
|
if not es.indices.exists("flights"):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user