mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Error when es_type_overrides receives unknown columns
This commit is contained in:
parent
28951c0ad1
commit
e17b4e03ea
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@ -31,6 +31,8 @@ jobs:
|
||||
with:
|
||||
python-version: 3.8
|
||||
- name: Install dependencies
|
||||
run: python3.8 -m pip install nox
|
||||
run: |
|
||||
sudo apt-get install --yes pandoc
|
||||
python3.8 -m pip install nox
|
||||
- name: Build documentation
|
||||
run: nox -s docs
|
||||
|
@ -42,8 +42,6 @@ from pandas.core.dtypes.inference import is_list_like
|
||||
if TYPE_CHECKING:
|
||||
from elasticsearch import Elasticsearch
|
||||
|
||||
from eland import DataFrame
|
||||
|
||||
|
||||
ES_FLOAT_TYPES: Set[str] = {"double", "float", "half_float", "scaled_float"}
|
||||
ES_INTEGER_TYPES: Set[str] = {"long", "integer", "short", "byte"}
|
||||
@ -463,7 +461,7 @@ class FieldMappings:
|
||||
return cls.ES_DTYPE_TO_PD_DTYPE.get(es_dtype, "object")
|
||||
|
||||
@staticmethod
|
||||
def _pd_dtype_to_es_dtype(pd_dtype):
|
||||
def _pd_dtype_to_es_dtype(pd_dtype) -> Optional[str]:
|
||||
"""
|
||||
Mapping pandas dtypes to Elasticsearch dtype
|
||||
--------------------------------------------
|
||||
@ -479,7 +477,7 @@ class FieldMappings:
|
||||
category NA NA Finite list of text values
|
||||
```
|
||||
"""
|
||||
es_dtype = None
|
||||
es_dtype: Optional[str] = None
|
||||
|
||||
# Map all to 64-bit - TODO map to specifics: int32 -> int etc.
|
||||
if is_float_dtype(pd_dtype):
|
||||
@ -501,7 +499,7 @@ class FieldMappings:
|
||||
|
||||
@staticmethod
|
||||
def _generate_es_mappings(
|
||||
dataframe: "DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None
|
||||
dataframe: "pd.DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None
|
||||
) -> Dict[str, Dict[str, Dict[str, Any]]]:
|
||||
"""Given a pandas dataframe, generate the associated Elasticsearch mapping
|
||||
|
||||
@ -536,8 +534,19 @@ class FieldMappings:
|
||||
}
|
||||
}
|
||||
"""
|
||||
es_dtype: str
|
||||
|
||||
mapping_props: Dict[str, Any] = {}
|
||||
|
||||
if es_type_overrides is not None:
|
||||
non_existing_columns: List[str] = [
|
||||
key for key in es_type_overrides.keys() if key not in dataframe.columns
|
||||
]
|
||||
if non_existing_columns:
|
||||
raise KeyError(
|
||||
f"{repr(non_existing_columns)[1:-1]} column(s) not in given dataframe"
|
||||
)
|
||||
|
||||
mapping_props = {}
|
||||
for column, dtype in dataframe.dtypes.iteritems():
|
||||
if es_type_overrides is not None and column in es_type_overrides:
|
||||
es_dtype = es_type_overrides[column]
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
import eland as ed
|
||||
from eland.field_mappings import FieldMappings
|
||||
@ -139,3 +140,28 @@ class TestDataFrameUtils(TestData):
|
||||
|
||||
# This test calls the same method so is redundant
|
||||
# assert_pandas_eland_frame_equal(pd_df, self.ed_flights())
|
||||
|
||||
def test_es_type_override_error(self):
|
||||
|
||||
df = self.pd_flights().filter(
|
||||
["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
|
||||
)
|
||||
|
||||
index_name = "test_es_type_override"
|
||||
|
||||
match = "'DistanceKilometers', 'DistanceMiles' column(s) not in given dataframe"
|
||||
with pytest.raises(KeyError, match=match):
|
||||
ed.pandas_to_eland(
|
||||
df,
|
||||
ES_TEST_CLIENT,
|
||||
index_name,
|
||||
es_if_exists="replace",
|
||||
es_refresh=True,
|
||||
use_pandas_index_for_es_ids=False,
|
||||
es_type_overrides={
|
||||
"AvgTicketPrice": "long",
|
||||
"DistanceKilometers": "text",
|
||||
"DistanceMiles": "text",
|
||||
},
|
||||
)
|
||||
ES_TEST_CLIENT.indices.delete(index=index_name)
|
||||
|
@ -19,7 +19,6 @@ import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import elasticsearch
|
||||
import nox
|
||||
|
||||
BASE_DIR = Path(__file__).parent
|
||||
@ -126,6 +125,8 @@ def docs(session):
|
||||
# See if we have an Elasticsearch cluster active
|
||||
# to rebuild the Jupyter notebooks with.
|
||||
try:
|
||||
import elasticsearch
|
||||
|
||||
es = elasticsearch.Elasticsearch("localhost:9200")
|
||||
es.info()
|
||||
if not es.indices.exists("flights"):
|
||||
|
Loading…
x
Reference in New Issue
Block a user