diff --git a/eland/mappings.py b/eland/mappings.py index fe2e03f..794c3ce 100644 --- a/eland/mappings.py +++ b/eland/mappings.py @@ -50,15 +50,18 @@ class Mappings: mappings: Mappings Object to copy """ + + # here we keep track of the format of any date fields + self._date_fields_format = {} if (client is not None) and (index_pattern is not None): get_mapping = client.get_mapping(index=index_pattern) # Get all fields (including all nested) and then all field_caps - all_fields = Mappings._extract_fields_from_mapping(get_mapping) + all_fields, self._date_fields_format = Mappings._extract_fields_from_mapping(get_mapping) all_fields_caps = client.field_caps(index=index_pattern, fields='*') # Get top level (not sub-field multifield) mappings - source_fields = Mappings._extract_fields_from_mapping(get_mapping, source_only=True) + source_fields, _ = Mappings._extract_fields_from_mapping(get_mapping, source_only=True) # Populate capability matrix of fields # field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source @@ -76,7 +79,7 @@ class Mappings: self._source_field_pd_dtypes[field_name] = pd_dtype @staticmethod - def _extract_fields_from_mapping(mappings, source_only=False): + def _extract_fields_from_mapping(mappings, source_only=False, date_format=None): """ Extract all field names and types from a mapping. ``` @@ -118,11 +121,14 @@ class Mappings: Returns ------- - fields: dict - Dict of field names and types + fields, dates_format: tuple(dict, dict) + where: + fields: Dict of field names and types + dates_format: Dict of date field names and format """ fields = {} + dates_format = {} # Recurse until we get a 'type: xxx' def flatten(x, name=''): @@ -131,7 +137,9 @@ class Mappings: if a == 'type' and type(x[a]) is str: # 'type' can be a name of a field field_name = name[:-1] field_type = x[a] - + # if field_type is 'date' keep track of the format info when available + if field_type == "date" and "format" in x: + dates_format[field_name] = x["format"] # If there is a conflicting type, warn - first values added wins if field_name in fields and fields[field_name] != field_type: warnings.warn("Field {} has conflicting types {} != {}". @@ -150,7 +158,7 @@ class Mappings: flatten(properties) - return fields + return fields, dates_format @staticmethod def _create_capability_matrix(all_fields, source_fields, all_fields_caps): @@ -367,6 +375,19 @@ class Mappings: """ return self._mappings_capabilities.loc[field_name] + def get_date_field_format(self, field_name): + """ + Parameters + ---------- + field_name: str + + Returns + ------- + dict + A dictionary (for date fields) containing the mapping {field_name:format} + """ + return self._date_fields_format.get(field_name) + def source_field_pd_dtype(self, field_name): """ Parameters diff --git a/eland/query_compiler.py b/eland/query_compiler.py index 07c93b3..b5bd00f 100644 --- a/eland/query_compiler.py +++ b/eland/query_compiler.py @@ -1,3 +1,6 @@ +import warnings +from typing import Union + import numpy as np import pandas as pd @@ -282,14 +285,10 @@ class ElandQueryCompiler: # Coerce types - for now just datetime if pd_dtype == 'datetime64[ns]': - # TODO - this doesn't work for certain ES date formats - # e.g. "@timestamp" : { - # "type" : "date", - # "format" : "epoch_millis" - # } - # 1484053499256 - we need to check ES type and format and add conversions like: - # pd.to_datetime(x, unit='ms') - x = pd.to_datetime(x) + x = elasticsearch_date_to_pandas_date( + x, + self._mappings.get_date_field_format(field_name) + ) # Elasticsearch can have multiple values for a field. These are represented as lists, so # create lists for this pivot (see notes above) @@ -648,3 +647,195 @@ class ElandQueryCompiler: field_to_display_names=self._field_to_display_names.copy(), display_to_field_names=self._display_to_field_names.copy() ) + + +def elasticsearch_date_to_pandas_date(value: Union[int, str], date_format: str) -> pd.Timestamp: + """ + Given a specific Elasticsearch format for a date datatype, returns the + 'partial' `to_datetime` function to parse a given value in that format + + **Date Formats: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html#built-in-date-formats + + Parameters + ---------- + value: Union[int, str] + The date value. + date_format: str + The Elasticsearch date format (ex. 'epoch_millis', 'epoch_second', etc.) + + Returns + ------- + datetime: pd.Timestamp + From https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html + Date formats can be customised, but if no format is specified then it uses the default: + "strict_date_optional_time||epoch_millis" + Therefore if no format is specified we assume either strict_date_optional_time + or epoch_millis. + """ + + if date_format is None: + try: + value = int(value) + return pd.to_datetime(value, unit='ms') + except ValueError: + return pd.to_datetime(value) + elif date_format == "epoch_millis": + return pd.to_datetime(value, unit='ms') + elif date_format == "epoch_second": + return pd.to_datetime(value, unit='s') + elif date_format == "strict_date_optional_time": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False) + elif date_format == "basic_date": + return pd.to_datetime(value, format="%Y%m%d") + elif date_format == "basic_date_time": + return pd.to_datetime(value, format="%Y%m%dT%H%M%S.%f", exact=False) + elif date_format == "basic_date_time_no_millis": + return pd.to_datetime(value, format="%Y%m%dT%H%M%S%z") + elif date_format == "basic_ordinal_date": + return pd.to_datetime(value, format="%Y%j") + elif date_format == "basic_ordinal_date_time": + return pd.to_datetime(value, format="%Y%jT%H%M%S.%f%z", exact=False) + elif date_format == "basic_ordinal_date_time_no_millis": + return pd.to_datetime(value, format="%Y%jT%H%M%S%z") + elif date_format == "basic_time": + return pd.to_datetime(value, format="%H%M%S.%f%z", exact=False) + elif date_format == "basic_time_no_millis": + return pd.to_datetime(value, format="%H%M%S%z") + elif date_format == "basic_t_time": + return pd.to_datetime(value, format="T%H%M%S.%f%z", exact=False) + elif date_format == "basic_t_time_no_millis": + return pd.to_datetime(value, format="T%H%M%S%z") + elif date_format == "basic_week_date": + return pd.to_datetime(value, format="%GW%V%u") + elif date_format == "basic_week_date_time": + return pd.to_datetime(value, format="%GW%V%uT%H%M%S.%f%z", exact=False) + elif date_format == "basic_week_date_time_no_millis": + return pd.to_datetime(value, format="%GW%V%uT%H%M%S%z") + elif date_format == "strict_date": + return pd.to_datetime(value, format="%Y-%m-%d") + elif date_format == "date": + return pd.to_datetime(value, format="%Y-%m-%d") + elif date_format == "strict_date_hour": + return pd.to_datetime(value, format="%Y-%m-%dT%H") + elif date_format == "date_hour": + return pd.to_datetime(value, format="%Y-%m-%dT%H") + elif date_format == "strict_date_hour_minute": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M") + elif date_format == "date_hour_minute": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M") + elif date_format == "strict_date_hour_minute_second": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S") + elif date_format == "date_hour_minute_second": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S") + elif date_format == "strict_date_hour_minute_second_fraction": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False) + elif date_format == "date_hour_minute_second_fraction": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False) + elif date_format == "strict_date_hour_minute_second_millis": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False) + elif date_format == "date_hour_minute_second_millis": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False) + elif date_format == "strict_date_time": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False) + elif date_format == "date_time": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False) + elif date_format == "strict_date_time_no_millis": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S%z") + elif date_format == "date_time_no_millis": + return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S%z") + elif date_format == "strict_hour": + return pd.to_datetime(value, format="%H") + elif date_format == "hour": + return pd.to_datetime(value, format="%H") + elif date_format == "strict_hour_minute": + return pd.to_datetime(value, format="%H:%M") + elif date_format == "hour_minute": + return pd.to_datetime(value, format="%H:%M") + elif date_format == "strict_hour_minute_second": + return pd.to_datetime(value, format="%H:%M:%S") + elif date_format == "hour_minute_second": + return pd.to_datetime(value, format="%H:%M:%S") + elif date_format == "strict_hour_minute_second_fraction": + return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False) + elif date_format == "hour_minute_second_fraction": + return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False) + elif date_format == "strict_hour_minute_second_millis": + return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False) + elif date_format == "hour_minute_second_millis": + return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False) + elif date_format == "strict_ordinal_date": + return pd.to_datetime(value, format="%Y-%j") + elif date_format == "ordinal_date": + return pd.to_datetime(value, format="%Y-%j") + elif date_format == "strict_ordinal_date_time": + return pd.to_datetime(value, format="%Y-%jT%H:%M:%S.%f%z", exact=False) + elif date_format == "ordinal_date_time": + return pd.to_datetime(value, format="%Y-%jT%H:%M:%S.%f%z", exact=False) + elif date_format == "strict_ordinal_date_time_no_millis": + return pd.to_datetime(value, format="%Y-%jT%H:%M:%S%z") + elif date_format == "ordinal_date_time_no_millis": + return pd.to_datetime(value, format="%Y-%jT%H:%M:%S%z") + elif date_format == "strict_time": + return pd.to_datetime(value, format="%H:%M:%S.%f%z", exact=False) + elif date_format == "time": + return pd.to_datetime(value, format="%H:%M:%S.%f%z", exact=False) + elif date_format == "strict_time_no_millis": + return pd.to_datetime(value, format="%H:%M:%S%z") + elif date_format == "time_no_millis": + return pd.to_datetime(value, format="%H:%M:%S%z") + elif date_format == "strict_t_time": + return pd.to_datetime(value, format="T%H:%M:%S.%f%z", exact=False) + elif date_format == "t_time": + return pd.to_datetime(value, format="T%H:%M:%S.%f%z", exact=False) + elif date_format == "strict_t_time_no_millis": + return pd.to_datetime(value, format="T%H:%M:%S%z") + elif date_format == "t_time_no_millis": + return pd.to_datetime(value, format="T%H:%M:%S%z") + elif date_format == "strict_week_date": + return pd.to_datetime(value, format="%G-W%V-%u") + elif date_format == "week_date": + return pd.to_datetime(value, format="%G-W%V-%u") + elif date_format == "strict_week_date_time": + return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S.%f%z", exact=False) + elif date_format == "week_date_time": + return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S.%f%z", exact=False) + elif date_format == "strict_week_date_time_no_millis": + return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S%z") + elif date_format == "week_date_time_no_millis": + return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S%z") + elif date_format == "strict_weekyear" or date_format == "weekyear": + # TODO investigate if there is a way of converting this + raise NotImplementedError("strict_weekyear is not implemented due to support in pandas") + return pd.to_datetime(value, format="%G") + # Not supported in pandas + # ValueError: ISO year directive '%G' must be used with the ISO week directive '%V' + # and a weekday directive '%A', '%a', '%w', or '%u'. + elif date_format == "strict_weekyear_week" or date_format == "weekyear_week": + # TODO investigate if there is a way of converting this + raise NotImplementedError("strict_weekyear_week is not implemented due to support in pandas") + return pd.to_datetime(value, format="%G-W%V") + # Not supported in pandas + # ValueError: ISO year directive '%G' must be used with the ISO week directive '%V' + # and a weekday directive '%A', '%a', '%w', or '%u'. + elif date_format == "strict_weekyear_week_day": + return pd.to_datetime(value, format="%G-W%V-%u") + elif date_format == "weekyear_week_day": + return pd.to_datetime(value, format="%G-W%V-%u") + elif date_format == "strict_year": + return pd.to_datetime(value, format="%Y") + elif date_format == "year": + return pd.to_datetime(value, format="%Y") + elif date_format == "strict_year_month": + return pd.to_datetime(value, format="%Y-%m") + elif date_format == "year_month": + return pd.to_datetime(value, format="%Y-%m") + elif date_format == "strict_year_month_day": + return pd.to_datetime(value, format="%Y-%m-%d") + elif date_format == "year_month_day": + return pd.to_datetime(value, format="%Y-%m-%d") + else: + warnings.warn("The '{}' format is not explicitly supported." + "Using pandas.to_datetime(value) to parse value".format(date_format), + Warning) + # TODO investigate how we could generate this just once for a bulk read. + return pd.to_datetime(value) diff --git a/eland/tests/dataframe/test_datetime_pytest.py b/eland/tests/dataframe/test_datetime_pytest.py index 20a59af..0762721 100644 --- a/eland/tests/dataframe/test_datetime_pytest.py +++ b/eland/tests/dataframe/test_datetime_pytest.py @@ -1,5 +1,7 @@ # File called _pytest for PyCharm compatability +from datetime import datetime +from elasticsearch import Elasticsearch import numpy as np import pandas as pd @@ -7,10 +9,53 @@ import eland as ed from eland.tests.common import ES_TEST_CLIENT from eland.tests.common import TestData from eland.tests.common import assert_pandas_eland_frame_equal +from eland.tests.common import assert_pandas_eland_series_equal class TestDataFrameDateTime(TestData): + times = ["2019-11-26T19:58:15.246+0000", + "1970-01-01T00:00:03.000+0000"] + time_index_name = 'test_time_formats' + + @classmethod + def setup_class(cls): + """ setup any state specific to the execution of the given class (which + usually contains tests). + """ + es = ES_TEST_CLIENT + if es.indices.exists(cls.time_index_name): + es.indices.delete(index=cls.time_index_name) + dts = [datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f%z") + for time in cls.times] + + time_formats_docs = [TestDataFrameDateTime.get_time_values_from_datetime(dt) + for dt in dts] + mappings = {'properties': {}} + + for field_name, field_value in time_formats_docs[0].items(): + mappings['properties'][field_name] = {} + mappings['properties'][field_name]['type'] = 'date' + mappings['properties'][field_name]['format'] = field_name + + body = {"mappings": mappings} + index = 'test_time_formats' + es.indices.delete(index=index, ignore=[400, 404]) + es.indices.create(index=index, body=body) + + for i, time_formats in enumerate(time_formats_docs): + es.index(index=index, body=time_formats, id=i) + es.indices.refresh(index=index) + + @classmethod + def teardown_class(cls): + """ teardown any state that was previously setup with a call to + setup_class. + """ + + es = ES_TEST_CLIENT + es.indices.delete(index=cls.time_index_name) + def test_datetime_to_ms(self): df = pd.DataFrame(data={'A': np.random.rand(3), 'B': 1, @@ -41,3 +86,179 @@ class TestDataFrameDateTime(TestData): ed_df_head = ed_df.head() assert_pandas_eland_frame_equal(df, ed_df_head) + + def test_all_formats(self): + index_name = self.time_index_name + ed_df = ed.read_es(ES_TEST_CLIENT, index_name) + + for format_name in self.time_formats.keys(): + times = [pd.to_datetime(datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%f%z") + .strftime(self.time_formats[format_name]), + format=self.time_formats[format_name]) + for dt in self.times] + + ed_series = ed_df[format_name] + pd_series = pd.Series(times, + index=[str(i) for i in range(len(self.times))], + name=format_name) + + assert_pandas_eland_series_equal(pd_series, ed_series) + + @staticmethod + def get_time_values_from_datetime(dt: datetime) -> dict: + time_formats = { + "epoch_millis": int(dt.timestamp() * 1000), + "epoch_second": int(dt.timestamp()), + "strict_date_optional_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "basic_date": dt.strftime("%Y%m%d"), + "basic_date_time": dt.strftime("%Y%m%dT%H%M%S.%f")[:-3] + dt.strftime("%z"), + "basic_date_time_no_millis": dt.strftime("%Y%m%dT%H%M%S%z"), + "basic_ordinal_date": dt.strftime("%Y%j"), + "basic_ordinal_date_time": dt.strftime("%Y%jT%H%M%S.%f")[:-3] + dt.strftime("%z"), + "basic_ordinal_date_time_no_millis": dt.strftime("%Y%jT%H%M%S%z"), + "basic_time": dt.strftime("%H%M%S.%f")[:-3] + dt.strftime("%z"), + "basic_time_no_millis": dt.strftime("%H%M%S%z"), + "basic_t_time": dt.strftime("T%H%M%S.%f")[:-3] + dt.strftime("%z"), + "basic_t_time_no_millis": dt.strftime("T%H%M%S%z"), + "basic_week_date": dt.strftime("%GW%V%u"), + "basic_week_date_time": dt.strftime("%GW%V%uT%H%M%S.%f")[:-3] + dt.strftime("%z"), + "basic_week_date_time_no_millis": dt.strftime("%GW%V%uT%H%M%S%z"), + "strict_date": dt.strftime("%Y-%m-%d"), + "date": dt.strftime("%Y-%m-%d"), + "strict_date_hour": dt.strftime("%Y-%m-%dT%H"), + "date_hour": dt.strftime("%Y-%m-%dT%H"), + "strict_date_hour_minute": dt.strftime("%Y-%m-%dT%H:%M"), + "date_hour_minute": dt.strftime("%Y-%m-%dT%H:%M"), + "strict_date_hour_minute_second": dt.strftime("%Y-%m-%dT%H:%M:%S"), + "date_hour_minute_second": dt.strftime("%Y-%m-%dT%H:%M:%S"), + "strict_date_hour_minute_second_fraction": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3], + "date_hour_minute_second_fraction": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3], + "strict_date_hour_minute_second_millis": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3], + "date_hour_minute_second_millis": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3], + "strict_date_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "date_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "strict_date_time_no_millis": dt.strftime("%Y-%m-%dT%H:%M:%S%z"), + "date_time_no_millis": dt.strftime("%Y-%m-%dT%H:%M:%S%z"), + "strict_hour": dt.strftime("%H"), + "hour": dt.strftime("%H"), + "strict_hour_minute": dt.strftime("%H:%M"), + "hour_minute": dt.strftime("%H:%M"), + "strict_hour_minute_second": dt.strftime("%H:%M:%S"), + "hour_minute_second": dt.strftime("%H:%M:%S"), + "strict_hour_minute_second_fraction": dt.strftime("%H:%M:%S.%f")[:-3], + "hour_minute_second_fraction": dt.strftime("%H:%M:%S.%f")[:-3], + "strict_hour_minute_second_millis": dt.strftime("%H:%M:%S.%f")[:-3], + "hour_minute_second_millis": dt.strftime("%H:%M:%S.%f")[:-3], + "strict_ordinal_date": dt.strftime("%Y-%j"), + "ordinal_date": dt.strftime("%Y-%j"), + "strict_ordinal_date_time": dt.strftime("%Y-%jT%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "ordinal_date_time": dt.strftime("%Y-%jT%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "strict_ordinal_date_time_no_millis": dt.strftime("%Y-%jT%H:%M:%S%z"), + "ordinal_date_time_no_millis": dt.strftime("%Y-%jT%H:%M:%S%z"), + "strict_time": dt.strftime("%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "time": dt.strftime("%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "strict_time_no_millis": dt.strftime("%H:%M:%S%z"), + "time_no_millis": dt.strftime("%H:%M:%S%z"), + "strict_t_time": dt.strftime("T%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "t_time": dt.strftime("T%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "strict_t_time_no_millis": dt.strftime("T%H:%M:%S%z"), + "t_time_no_millis": dt.strftime("T%H:%M:%S%z"), + "strict_week_date": dt.strftime("%G-W%V-%u"), + "week_date": dt.strftime("%G-W%V-%u"), + "strict_week_date_time": dt.strftime("%G-W%V-%uT%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "week_date_time": dt.strftime("%G-W%V-%uT%H:%M:%S.%f")[:-3] + dt.strftime("%z"), + "strict_week_date_time_no_millis": dt.strftime("%G-W%V-%uT%H:%M:%S%z"), + "week_date_time_no_millis": dt.strftime("%G-W%V-%uT%H:%M:%S%z"), + "strict_weekyear": dt.strftime("%G"), + "weekyear": dt.strftime("%G"), + "strict_weekyear_week": dt.strftime("%G-W%V"), + "weekyear_week": dt.strftime("%G-W%V"), + "strict_weekyear_week_day": dt.strftime("%G-W%V-%u"), + "weekyear_week_day": dt.strftime("%G-W%V-%u"), + "strict_year": dt.strftime("%Y"), + "year": dt.strftime("%Y"), + "strict_year_month": dt.strftime("%Y-%m"), + "year_month": dt.strftime("%Y-%m"), + "strict_year_month_day": dt.strftime("%Y-%m-%d"), + "year_month_day": dt.strftime("%Y-%m-%d"), + } + + return time_formats + + time_formats = { + "epoch_millis": "%Y-%m-%dT%H:%M:%S.%f", + "epoch_second": "%Y-%m-%dT%H:%M:%S", + "strict_date_optional_time": "%Y-%m-%dT%H:%M:%S.%f%z", + "basic_date": "%Y%m%d", + "basic_date_time": "%Y%m%dT%H%M%S.%f", + "basic_date_time_no_millis": "%Y%m%dT%H%M%S%z", + "basic_ordinal_date": "%Y%j", + "basic_ordinal_date_time": "%Y%jT%H%M%S.%f%z", + "basic_ordinal_date_time_no_millis": "%Y%jT%H%M%S%z", + "basic_time": "%H%M%S.%f%z", + "basic_time_no_millis": "%H%M%S%z", + "basic_t_time": "T%H%M%S.%f%z", + "basic_t_time_no_millis": "T%H%M%S%z", + "basic_week_date": "%GW%V%u", + "basic_week_date_time": "%GW%V%uT%H%M%S.%f%z", + "basic_week_date_time_no_millis": "%GW%V%uT%H%M%S%z", + "date": "%Y-%m-%d", + "strict_date": "%Y-%m-%d", + "strict_date_hour": "%Y-%m-%dT%H", + "date_hour": "%Y-%m-%dT%H", + "strict_date_hour_minute": "%Y-%m-%dT%H:%M", + "date_hour_minute": "%Y-%m-%dT%H:%M", + "strict_date_hour_minute_second": "%Y-%m-%dT%H:%M:%S", + "date_hour_minute_second": "%Y-%m-%dT%H:%M:%S", + "strict_date_hour_minute_second_fraction": "%Y-%m-%dT%H:%M:%S.%f", + "date_hour_minute_second_fraction": "%Y-%m-%dT%H:%M:%S.%f", + "strict_date_hour_minute_second_millis": "%Y-%m-%dT%H:%M:%S.%f", + "date_hour_minute_second_millis": "%Y-%m-%dT%H:%M:%S.%f", + "strict_date_time": "%Y-%m-%dT%H:%M:%S.%f%z", + "date_time": "%Y-%m-%dT%H:%M:%S.%f%z", + "strict_date_time_no_millis": "%Y-%m-%dT%H:%M:%S%z", + "date_time_no_millis": "%Y-%m-%dT%H:%M:%S%z", + "strict_hour": "%H", + "hour": "%H", + "strict_hour_minute": "%H:%M", + "hour_minute": "%H:%M", + "strict_hour_minute_second": "%H:%M:%S", + "hour_minute_second": "%H:%M:%S", + "strict_hour_minute_second_fraction": "%H:%M:%S.%f", + "hour_minute_second_fraction": "%H:%M:%S.%f", + "strict_hour_minute_second_millis": "%H:%M:%S.%f", + "hour_minute_second_millis": "%H:%M:%S.%f", + "strict_ordinal_date": "%Y-%j", + "ordinal_date": "%Y-%j", + "strict_ordinal_date_time": "%Y-%jT%H:%M:%S.%f%z", + "ordinal_date_time": "%Y-%jT%H:%M:%S.%f%z", + "strict_ordinal_date_time_no_millis": "%Y-%jT%H:%M:%S%z", + "ordinal_date_time_no_millis": "%Y-%jT%H:%M:%S%z", + "strict_time": "%H:%M:%S.%f%z", + "time": "%H:%M:%S.%f%z", + "strict_time_no_millis": "%H:%M:%S%z", + "time_no_millis": "%H:%M:%S%z", + "strict_t_time": "T%H:%M:%S.%f%z", + "t_time": "T%H:%M:%S.%f%z", + "strict_t_time_no_millis": "T%H:%M:%S%z", + "t_time_no_millis": "T%H:%M:%S%z", + "strict_week_date": "%G-W%V-%u", + "week_date": "%G-W%V-%u", + "strict_week_date_time": "%G-W%V-%uT%H:%M:%S.%f%z", + "week_date_time": "%G-W%V-%uT%H:%M:%S.%f%z", + "strict_week_date_time_no_millis": "%G-W%V-%uT%H:%M:%S%z", + "week_date_time_no_millis": "%G-W%V-%uT%H:%M:%S%z", + "strict_weekyear_week_day": "%G-W%V-%u", + "weekyear_week_day": "%G-W%V-%u", + "strict_year": "%Y", + "year": "%Y", + "strict_year_month": "%Y-%m", + "year_month": "%Y-%m", + "strict_year_month_day": "%Y-%m-%d", + "year_month_day": "%Y-%m-%d" + } + + # excluding these formats as pandas throws a ValueError + # "strict_weekyear": ("%G", None) - not supported in pandas + # "strict_weekyear_week": ("%G-W%V", None), + # E ValueError: ISO year directive '%G' must be used with the ISO week directive '%V' and a weekday directive '%A', '%a', '%w', or '%u'. diff --git a/eland/tests/setup_tests.py b/eland/tests/setup_tests.py index 5fc5d34..43efea1 100644 --- a/eland/tests/setup_tests.py +++ b/eland/tests/setup_tests.py @@ -51,6 +51,7 @@ def _setup_data(es): print("Done", index_name) + def _update_max_compilations_limit(es, limit="10000/1m"): print('Updating script.max_compilations_rate to ', limit) cluster_client = ClusterClient(es) @@ -79,7 +80,6 @@ if __name__ == '__main__': # Create connection to Elasticsearch - use defaults print('Connecting to ES', ELASTICSEARCH_HOST) es = ES_TEST_CLIENT - _setup_data(es) _setup_test_mappings(es)