Error when es_type_overrides receives unknown columns

This commit is contained in:
P. Sai Vinay 2020-10-28 00:18:31 +05:30 committed by GitHub
parent 28951c0ad1
commit e17b4e03ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 8 deletions

View File

@ -31,6 +31,8 @@ jobs:
with:
python-version: 3.8
- name: Install dependencies
run: python3.8 -m pip install nox
run: |
sudo apt-get install --yes pandoc
python3.8 -m pip install nox
- name: Build documentation
run: nox -s docs

View File

@ -42,8 +42,6 @@ from pandas.core.dtypes.inference import is_list_like
if TYPE_CHECKING:
from elasticsearch import Elasticsearch
from eland import DataFrame
ES_FLOAT_TYPES: Set[str] = {"double", "float", "half_float", "scaled_float"}
ES_INTEGER_TYPES: Set[str] = {"long", "integer", "short", "byte"}
@ -463,7 +461,7 @@ class FieldMappings:
return cls.ES_DTYPE_TO_PD_DTYPE.get(es_dtype, "object")
@staticmethod
def _pd_dtype_to_es_dtype(pd_dtype):
def _pd_dtype_to_es_dtype(pd_dtype) -> Optional[str]:
"""
Mapping pandas dtypes to Elasticsearch dtype
--------------------------------------------
@ -479,7 +477,7 @@ class FieldMappings:
category NA NA Finite list of text values
```
"""
es_dtype = None
es_dtype: Optional[str] = None
# Map all to 64-bit - TODO map to specifics: int32 -> int etc.
if is_float_dtype(pd_dtype):
@ -501,7 +499,7 @@ class FieldMappings:
@staticmethod
def _generate_es_mappings(
dataframe: "DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None
dataframe: "pd.DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None
) -> Dict[str, Dict[str, Dict[str, Any]]]:
"""Given a pandas dataframe, generate the associated Elasticsearch mapping
@ -536,8 +534,19 @@ class FieldMappings:
}
}
"""
es_dtype: str
mapping_props: Dict[str, Any] = {}
if es_type_overrides is not None:
non_existing_columns: List[str] = [
key for key in es_type_overrides.keys() if key not in dataframe.columns
]
if non_existing_columns:
raise KeyError(
f"{repr(non_existing_columns)[1:-1]} column(s) not in given dataframe"
)
mapping_props = {}
for column, dtype in dataframe.dtypes.iteritems():
if es_type_overrides is not None and column in es_type_overrides:
es_dtype = es_type_overrides[column]

View File

@ -19,6 +19,7 @@
import numpy as np
import pandas as pd
import pytest
import eland as ed
from eland.field_mappings import FieldMappings
@ -139,3 +140,28 @@ class TestDataFrameUtils(TestData):
# This test calls the same method so is redundant
# assert_pandas_eland_frame_equal(pd_df, self.ed_flights())
def test_es_type_override_error(self):
df = self.pd_flights().filter(
["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
)
index_name = "test_es_type_override"
match = "'DistanceKilometers', 'DistanceMiles' column(s) not in given dataframe"
with pytest.raises(KeyError, match=match):
ed.pandas_to_eland(
df,
ES_TEST_CLIENT,
index_name,
es_if_exists="replace",
es_refresh=True,
use_pandas_index_for_es_ids=False,
es_type_overrides={
"AvgTicketPrice": "long",
"DistanceKilometers": "text",
"DistanceMiles": "text",
},
)
ES_TEST_CLIENT.indices.delete(index=index_name)

View File

@ -19,7 +19,6 @@ import os
import subprocess
from pathlib import Path
import elasticsearch
import nox
BASE_DIR = Path(__file__).parent
@ -126,6 +125,8 @@ def docs(session):
# See if we have an Elasticsearch cluster active
# to rebuild the Jupyter notebooks with.
try:
import elasticsearch
es = elasticsearch.Elasticsearch("localhost:9200")
es.info()
if not es.indices.exists("flights"):