diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 76ad861..4b4e35b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,6 +31,8 @@ jobs: with: python-version: 3.8 - name: Install dependencies - run: python3.8 -m pip install nox + run: | + sudo apt-get install --yes pandoc + python3.8 -m pip install nox - name: Build documentation run: nox -s docs diff --git a/eland/field_mappings.py b/eland/field_mappings.py index 7226d41..00cb2f2 100644 --- a/eland/field_mappings.py +++ b/eland/field_mappings.py @@ -42,8 +42,6 @@ from pandas.core.dtypes.inference import is_list_like if TYPE_CHECKING: from elasticsearch import Elasticsearch - from eland import DataFrame - ES_FLOAT_TYPES: Set[str] = {"double", "float", "half_float", "scaled_float"} ES_INTEGER_TYPES: Set[str] = {"long", "integer", "short", "byte"} @@ -463,7 +461,7 @@ class FieldMappings: return cls.ES_DTYPE_TO_PD_DTYPE.get(es_dtype, "object") @staticmethod - def _pd_dtype_to_es_dtype(pd_dtype): + def _pd_dtype_to_es_dtype(pd_dtype) -> Optional[str]: """ Mapping pandas dtypes to Elasticsearch dtype -------------------------------------------- @@ -479,7 +477,7 @@ class FieldMappings: category NA NA Finite list of text values ``` """ - es_dtype = None + es_dtype: Optional[str] = None # Map all to 64-bit - TODO map to specifics: int32 -> int etc. if is_float_dtype(pd_dtype): @@ -501,7 +499,7 @@ class FieldMappings: @staticmethod def _generate_es_mappings( - dataframe: "DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None + dataframe: "pd.DataFrame", es_type_overrides: Optional[Mapping[str, str]] = None ) -> Dict[str, Dict[str, Dict[str, Any]]]: """Given a pandas dataframe, generate the associated Elasticsearch mapping @@ -536,8 +534,19 @@ class FieldMappings: } } """ + es_dtype: str + + mapping_props: Dict[str, Any] = {} + + if es_type_overrides is not None: + non_existing_columns: List[str] = [ + key for key in es_type_overrides.keys() if key not in dataframe.columns + ] + if non_existing_columns: + raise KeyError( + f"{repr(non_existing_columns)[1:-1]} column(s) not in given dataframe" + ) - mapping_props = {} for column, dtype in dataframe.dtypes.iteritems(): if es_type_overrides is not None and column in es_type_overrides: es_dtype = es_type_overrides[column] diff --git a/eland/tests/dataframe/test_utils_pytest.py b/eland/tests/dataframe/test_utils_pytest.py index e1b57c3..87586ee 100644 --- a/eland/tests/dataframe/test_utils_pytest.py +++ b/eland/tests/dataframe/test_utils_pytest.py @@ -19,6 +19,7 @@ import numpy as np import pandas as pd +import pytest import eland as ed from eland.field_mappings import FieldMappings @@ -139,3 +140,28 @@ class TestDataFrameUtils(TestData): # This test calls the same method so is redundant # assert_pandas_eland_frame_equal(pd_df, self.ed_flights()) + + def test_es_type_override_error(self): + + df = self.pd_flights().filter( + ["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"] + ) + + index_name = "test_es_type_override" + + match = "'DistanceKilometers', 'DistanceMiles' column(s) not in given dataframe" + with pytest.raises(KeyError, match=match): + ed.pandas_to_eland( + df, + ES_TEST_CLIENT, + index_name, + es_if_exists="replace", + es_refresh=True, + use_pandas_index_for_es_ids=False, + es_type_overrides={ + "AvgTicketPrice": "long", + "DistanceKilometers": "text", + "DistanceMiles": "text", + }, + ) + ES_TEST_CLIENT.indices.delete(index=index_name) diff --git a/noxfile.py b/noxfile.py index 3f3a170..a515ce3 100644 --- a/noxfile.py +++ b/noxfile.py @@ -19,7 +19,6 @@ import os import subprocess from pathlib import Path -import elasticsearch import nox BASE_DIR = Path(__file__).parent @@ -126,6 +125,8 @@ def docs(session): # See if we have an Elasticsearch cluster active # to rebuild the Jupyter notebooks with. try: + import elasticsearch + es = elasticsearch.Elasticsearch("localhost:9200") es.info() if not es.indices.exists("flights"):