Added support for 2 date formats: (#70)

* Adds support for multiple date formats
This commit is contained in:
Francesco Vigliaturo 2019-12-04 17:42:50 +01:00 committed by GitHub
parent 13141645f7
commit 99bfea42b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 449 additions and 16 deletions

View File

@ -50,15 +50,18 @@ class Mappings:
mappings: Mappings
Object to copy
"""
# here we keep track of the format of any date fields
self._date_fields_format = {}
if (client is not None) and (index_pattern is not None):
get_mapping = client.get_mapping(index=index_pattern)
# Get all fields (including all nested) and then all field_caps
all_fields = Mappings._extract_fields_from_mapping(get_mapping)
all_fields, self._date_fields_format = Mappings._extract_fields_from_mapping(get_mapping)
all_fields_caps = client.field_caps(index=index_pattern, fields='*')
# Get top level (not sub-field multifield) mappings
source_fields = Mappings._extract_fields_from_mapping(get_mapping, source_only=True)
source_fields, _ = Mappings._extract_fields_from_mapping(get_mapping, source_only=True)
# Populate capability matrix of fields
# field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source
@ -76,7 +79,7 @@ class Mappings:
self._source_field_pd_dtypes[field_name] = pd_dtype
@staticmethod
def _extract_fields_from_mapping(mappings, source_only=False):
def _extract_fields_from_mapping(mappings, source_only=False, date_format=None):
"""
Extract all field names and types from a mapping.
```
@ -118,11 +121,14 @@ class Mappings:
Returns
-------
fields: dict
Dict of field names and types
fields, dates_format: tuple(dict, dict)
where:
fields: Dict of field names and types
dates_format: Dict of date field names and format
"""
fields = {}
dates_format = {}
# Recurse until we get a 'type: xxx'
def flatten(x, name=''):
@ -131,7 +137,9 @@ class Mappings:
if a == 'type' and type(x[a]) is str: # 'type' can be a name of a field
field_name = name[:-1]
field_type = x[a]
# if field_type is 'date' keep track of the format info when available
if field_type == "date" and "format" in x:
dates_format[field_name] = x["format"]
# If there is a conflicting type, warn - first values added wins
if field_name in fields and fields[field_name] != field_type:
warnings.warn("Field {} has conflicting types {} != {}".
@ -150,7 +158,7 @@ class Mappings:
flatten(properties)
return fields
return fields, dates_format
@staticmethod
def _create_capability_matrix(all_fields, source_fields, all_fields_caps):
@ -367,6 +375,19 @@ class Mappings:
"""
return self._mappings_capabilities.loc[field_name]
def get_date_field_format(self, field_name):
"""
Parameters
----------
field_name: str
Returns
-------
dict
A dictionary (for date fields) containing the mapping {field_name:format}
"""
return self._date_fields_format.get(field_name)
def source_field_pd_dtype(self, field_name):
"""
Parameters

View File

@ -1,3 +1,6 @@
import warnings
from typing import Union
import numpy as np
import pandas as pd
@ -282,14 +285,10 @@ class ElandQueryCompiler:
# Coerce types - for now just datetime
if pd_dtype == 'datetime64[ns]':
# TODO - this doesn't work for certain ES date formats
# e.g. "@timestamp" : {
# "type" : "date",
# "format" : "epoch_millis"
# }
# 1484053499256 - we need to check ES type and format and add conversions like:
# pd.to_datetime(x, unit='ms')
x = pd.to_datetime(x)
x = elasticsearch_date_to_pandas_date(
x,
self._mappings.get_date_field_format(field_name)
)
# Elasticsearch can have multiple values for a field. These are represented as lists, so
# create lists for this pivot (see notes above)
@ -648,3 +647,195 @@ class ElandQueryCompiler:
field_to_display_names=self._field_to_display_names.copy(),
display_to_field_names=self._display_to_field_names.copy()
)
def elasticsearch_date_to_pandas_date(value: Union[int, str], date_format: str) -> pd.Timestamp:
"""
Given a specific Elasticsearch format for a date datatype, returns the
'partial' `to_datetime` function to parse a given value in that format
**Date Formats: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html#built-in-date-formats
Parameters
----------
value: Union[int, str]
The date value.
date_format: str
The Elasticsearch date format (ex. 'epoch_millis', 'epoch_second', etc.)
Returns
-------
datetime: pd.Timestamp
From https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html
Date formats can be customised, but if no format is specified then it uses the default:
"strict_date_optional_time||epoch_millis"
Therefore if no format is specified we assume either strict_date_optional_time
or epoch_millis.
"""
if date_format is None:
try:
value = int(value)
return pd.to_datetime(value, unit='ms')
except ValueError:
return pd.to_datetime(value)
elif date_format == "epoch_millis":
return pd.to_datetime(value, unit='ms')
elif date_format == "epoch_second":
return pd.to_datetime(value, unit='s')
elif date_format == "strict_date_optional_time":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False)
elif date_format == "basic_date":
return pd.to_datetime(value, format="%Y%m%d")
elif date_format == "basic_date_time":
return pd.to_datetime(value, format="%Y%m%dT%H%M%S.%f", exact=False)
elif date_format == "basic_date_time_no_millis":
return pd.to_datetime(value, format="%Y%m%dT%H%M%S%z")
elif date_format == "basic_ordinal_date":
return pd.to_datetime(value, format="%Y%j")
elif date_format == "basic_ordinal_date_time":
return pd.to_datetime(value, format="%Y%jT%H%M%S.%f%z", exact=False)
elif date_format == "basic_ordinal_date_time_no_millis":
return pd.to_datetime(value, format="%Y%jT%H%M%S%z")
elif date_format == "basic_time":
return pd.to_datetime(value, format="%H%M%S.%f%z", exact=False)
elif date_format == "basic_time_no_millis":
return pd.to_datetime(value, format="%H%M%S%z")
elif date_format == "basic_t_time":
return pd.to_datetime(value, format="T%H%M%S.%f%z", exact=False)
elif date_format == "basic_t_time_no_millis":
return pd.to_datetime(value, format="T%H%M%S%z")
elif date_format == "basic_week_date":
return pd.to_datetime(value, format="%GW%V%u")
elif date_format == "basic_week_date_time":
return pd.to_datetime(value, format="%GW%V%uT%H%M%S.%f%z", exact=False)
elif date_format == "basic_week_date_time_no_millis":
return pd.to_datetime(value, format="%GW%V%uT%H%M%S%z")
elif date_format == "strict_date":
return pd.to_datetime(value, format="%Y-%m-%d")
elif date_format == "date":
return pd.to_datetime(value, format="%Y-%m-%d")
elif date_format == "strict_date_hour":
return pd.to_datetime(value, format="%Y-%m-%dT%H")
elif date_format == "date_hour":
return pd.to_datetime(value, format="%Y-%m-%dT%H")
elif date_format == "strict_date_hour_minute":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M")
elif date_format == "date_hour_minute":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M")
elif date_format == "strict_date_hour_minute_second":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S")
elif date_format == "date_hour_minute_second":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S")
elif date_format == "strict_date_hour_minute_second_fraction":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False)
elif date_format == "date_hour_minute_second_fraction":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False)
elif date_format == "strict_date_hour_minute_second_millis":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False)
elif date_format == "date_hour_minute_second_millis":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False)
elif date_format == "strict_date_time":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False)
elif date_format == "date_time":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False)
elif date_format == "strict_date_time_no_millis":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S%z")
elif date_format == "date_time_no_millis":
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S%z")
elif date_format == "strict_hour":
return pd.to_datetime(value, format="%H")
elif date_format == "hour":
return pd.to_datetime(value, format="%H")
elif date_format == "strict_hour_minute":
return pd.to_datetime(value, format="%H:%M")
elif date_format == "hour_minute":
return pd.to_datetime(value, format="%H:%M")
elif date_format == "strict_hour_minute_second":
return pd.to_datetime(value, format="%H:%M:%S")
elif date_format == "hour_minute_second":
return pd.to_datetime(value, format="%H:%M:%S")
elif date_format == "strict_hour_minute_second_fraction":
return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False)
elif date_format == "hour_minute_second_fraction":
return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False)
elif date_format == "strict_hour_minute_second_millis":
return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False)
elif date_format == "hour_minute_second_millis":
return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False)
elif date_format == "strict_ordinal_date":
return pd.to_datetime(value, format="%Y-%j")
elif date_format == "ordinal_date":
return pd.to_datetime(value, format="%Y-%j")
elif date_format == "strict_ordinal_date_time":
return pd.to_datetime(value, format="%Y-%jT%H:%M:%S.%f%z", exact=False)
elif date_format == "ordinal_date_time":
return pd.to_datetime(value, format="%Y-%jT%H:%M:%S.%f%z", exact=False)
elif date_format == "strict_ordinal_date_time_no_millis":
return pd.to_datetime(value, format="%Y-%jT%H:%M:%S%z")
elif date_format == "ordinal_date_time_no_millis":
return pd.to_datetime(value, format="%Y-%jT%H:%M:%S%z")
elif date_format == "strict_time":
return pd.to_datetime(value, format="%H:%M:%S.%f%z", exact=False)
elif date_format == "time":
return pd.to_datetime(value, format="%H:%M:%S.%f%z", exact=False)
elif date_format == "strict_time_no_millis":
return pd.to_datetime(value, format="%H:%M:%S%z")
elif date_format == "time_no_millis":
return pd.to_datetime(value, format="%H:%M:%S%z")
elif date_format == "strict_t_time":
return pd.to_datetime(value, format="T%H:%M:%S.%f%z", exact=False)
elif date_format == "t_time":
return pd.to_datetime(value, format="T%H:%M:%S.%f%z", exact=False)
elif date_format == "strict_t_time_no_millis":
return pd.to_datetime(value, format="T%H:%M:%S%z")
elif date_format == "t_time_no_millis":
return pd.to_datetime(value, format="T%H:%M:%S%z")
elif date_format == "strict_week_date":
return pd.to_datetime(value, format="%G-W%V-%u")
elif date_format == "week_date":
return pd.to_datetime(value, format="%G-W%V-%u")
elif date_format == "strict_week_date_time":
return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S.%f%z", exact=False)
elif date_format == "week_date_time":
return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S.%f%z", exact=False)
elif date_format == "strict_week_date_time_no_millis":
return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S%z")
elif date_format == "week_date_time_no_millis":
return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S%z")
elif date_format == "strict_weekyear" or date_format == "weekyear":
# TODO investigate if there is a way of converting this
raise NotImplementedError("strict_weekyear is not implemented due to support in pandas")
return pd.to_datetime(value, format="%G")
# Not supported in pandas
# ValueError: ISO year directive '%G' must be used with the ISO week directive '%V'
# and a weekday directive '%A', '%a', '%w', or '%u'.
elif date_format == "strict_weekyear_week" or date_format == "weekyear_week":
# TODO investigate if there is a way of converting this
raise NotImplementedError("strict_weekyear_week is not implemented due to support in pandas")
return pd.to_datetime(value, format="%G-W%V")
# Not supported in pandas
# ValueError: ISO year directive '%G' must be used with the ISO week directive '%V'
# and a weekday directive '%A', '%a', '%w', or '%u'.
elif date_format == "strict_weekyear_week_day":
return pd.to_datetime(value, format="%G-W%V-%u")
elif date_format == "weekyear_week_day":
return pd.to_datetime(value, format="%G-W%V-%u")
elif date_format == "strict_year":
return pd.to_datetime(value, format="%Y")
elif date_format == "year":
return pd.to_datetime(value, format="%Y")
elif date_format == "strict_year_month":
return pd.to_datetime(value, format="%Y-%m")
elif date_format == "year_month":
return pd.to_datetime(value, format="%Y-%m")
elif date_format == "strict_year_month_day":
return pd.to_datetime(value, format="%Y-%m-%d")
elif date_format == "year_month_day":
return pd.to_datetime(value, format="%Y-%m-%d")
else:
warnings.warn("The '{}' format is not explicitly supported."
"Using pandas.to_datetime(value) to parse value".format(date_format),
Warning)
# TODO investigate how we could generate this just once for a bulk read.
return pd.to_datetime(value)

View File

@ -1,5 +1,7 @@
# File called _pytest for PyCharm compatability
from datetime import datetime
from elasticsearch import Elasticsearch
import numpy as np
import pandas as pd
@ -7,10 +9,53 @@ import eland as ed
from eland.tests.common import ES_TEST_CLIENT
from eland.tests.common import TestData
from eland.tests.common import assert_pandas_eland_frame_equal
from eland.tests.common import assert_pandas_eland_series_equal
class TestDataFrameDateTime(TestData):
times = ["2019-11-26T19:58:15.246+0000",
"1970-01-01T00:00:03.000+0000"]
time_index_name = 'test_time_formats'
@classmethod
def setup_class(cls):
""" setup any state specific to the execution of the given class (which
usually contains tests).
"""
es = ES_TEST_CLIENT
if es.indices.exists(cls.time_index_name):
es.indices.delete(index=cls.time_index_name)
dts = [datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f%z")
for time in cls.times]
time_formats_docs = [TestDataFrameDateTime.get_time_values_from_datetime(dt)
for dt in dts]
mappings = {'properties': {}}
for field_name, field_value in time_formats_docs[0].items():
mappings['properties'][field_name] = {}
mappings['properties'][field_name]['type'] = 'date'
mappings['properties'][field_name]['format'] = field_name
body = {"mappings": mappings}
index = 'test_time_formats'
es.indices.delete(index=index, ignore=[400, 404])
es.indices.create(index=index, body=body)
for i, time_formats in enumerate(time_formats_docs):
es.index(index=index, body=time_formats, id=i)
es.indices.refresh(index=index)
@classmethod
def teardown_class(cls):
""" teardown any state that was previously setup with a call to
setup_class.
"""
es = ES_TEST_CLIENT
es.indices.delete(index=cls.time_index_name)
def test_datetime_to_ms(self):
df = pd.DataFrame(data={'A': np.random.rand(3),
'B': 1,
@ -41,3 +86,179 @@ class TestDataFrameDateTime(TestData):
ed_df_head = ed_df.head()
assert_pandas_eland_frame_equal(df, ed_df_head)
def test_all_formats(self):
index_name = self.time_index_name
ed_df = ed.read_es(ES_TEST_CLIENT, index_name)
for format_name in self.time_formats.keys():
times = [pd.to_datetime(datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%f%z")
.strftime(self.time_formats[format_name]),
format=self.time_formats[format_name])
for dt in self.times]
ed_series = ed_df[format_name]
pd_series = pd.Series(times,
index=[str(i) for i in range(len(self.times))],
name=format_name)
assert_pandas_eland_series_equal(pd_series, ed_series)
@staticmethod
def get_time_values_from_datetime(dt: datetime) -> dict:
time_formats = {
"epoch_millis": int(dt.timestamp() * 1000),
"epoch_second": int(dt.timestamp()),
"strict_date_optional_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"basic_date": dt.strftime("%Y%m%d"),
"basic_date_time": dt.strftime("%Y%m%dT%H%M%S.%f")[:-3] + dt.strftime("%z"),
"basic_date_time_no_millis": dt.strftime("%Y%m%dT%H%M%S%z"),
"basic_ordinal_date": dt.strftime("%Y%j"),
"basic_ordinal_date_time": dt.strftime("%Y%jT%H%M%S.%f")[:-3] + dt.strftime("%z"),
"basic_ordinal_date_time_no_millis": dt.strftime("%Y%jT%H%M%S%z"),
"basic_time": dt.strftime("%H%M%S.%f")[:-3] + dt.strftime("%z"),
"basic_time_no_millis": dt.strftime("%H%M%S%z"),
"basic_t_time": dt.strftime("T%H%M%S.%f")[:-3] + dt.strftime("%z"),
"basic_t_time_no_millis": dt.strftime("T%H%M%S%z"),
"basic_week_date": dt.strftime("%GW%V%u"),
"basic_week_date_time": dt.strftime("%GW%V%uT%H%M%S.%f")[:-3] + dt.strftime("%z"),
"basic_week_date_time_no_millis": dt.strftime("%GW%V%uT%H%M%S%z"),
"strict_date": dt.strftime("%Y-%m-%d"),
"date": dt.strftime("%Y-%m-%d"),
"strict_date_hour": dt.strftime("%Y-%m-%dT%H"),
"date_hour": dt.strftime("%Y-%m-%dT%H"),
"strict_date_hour_minute": dt.strftime("%Y-%m-%dT%H:%M"),
"date_hour_minute": dt.strftime("%Y-%m-%dT%H:%M"),
"strict_date_hour_minute_second": dt.strftime("%Y-%m-%dT%H:%M:%S"),
"date_hour_minute_second": dt.strftime("%Y-%m-%dT%H:%M:%S"),
"strict_date_hour_minute_second_fraction": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
"date_hour_minute_second_fraction": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
"strict_date_hour_minute_second_millis": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
"date_hour_minute_second_millis": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
"strict_date_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"date_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"strict_date_time_no_millis": dt.strftime("%Y-%m-%dT%H:%M:%S%z"),
"date_time_no_millis": dt.strftime("%Y-%m-%dT%H:%M:%S%z"),
"strict_hour": dt.strftime("%H"),
"hour": dt.strftime("%H"),
"strict_hour_minute": dt.strftime("%H:%M"),
"hour_minute": dt.strftime("%H:%M"),
"strict_hour_minute_second": dt.strftime("%H:%M:%S"),
"hour_minute_second": dt.strftime("%H:%M:%S"),
"strict_hour_minute_second_fraction": dt.strftime("%H:%M:%S.%f")[:-3],
"hour_minute_second_fraction": dt.strftime("%H:%M:%S.%f")[:-3],
"strict_hour_minute_second_millis": dt.strftime("%H:%M:%S.%f")[:-3],
"hour_minute_second_millis": dt.strftime("%H:%M:%S.%f")[:-3],
"strict_ordinal_date": dt.strftime("%Y-%j"),
"ordinal_date": dt.strftime("%Y-%j"),
"strict_ordinal_date_time": dt.strftime("%Y-%jT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"ordinal_date_time": dt.strftime("%Y-%jT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"strict_ordinal_date_time_no_millis": dt.strftime("%Y-%jT%H:%M:%S%z"),
"ordinal_date_time_no_millis": dt.strftime("%Y-%jT%H:%M:%S%z"),
"strict_time": dt.strftime("%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"time": dt.strftime("%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"strict_time_no_millis": dt.strftime("%H:%M:%S%z"),
"time_no_millis": dt.strftime("%H:%M:%S%z"),
"strict_t_time": dt.strftime("T%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"t_time": dt.strftime("T%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"strict_t_time_no_millis": dt.strftime("T%H:%M:%S%z"),
"t_time_no_millis": dt.strftime("T%H:%M:%S%z"),
"strict_week_date": dt.strftime("%G-W%V-%u"),
"week_date": dt.strftime("%G-W%V-%u"),
"strict_week_date_time": dt.strftime("%G-W%V-%uT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"week_date_time": dt.strftime("%G-W%V-%uT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
"strict_week_date_time_no_millis": dt.strftime("%G-W%V-%uT%H:%M:%S%z"),
"week_date_time_no_millis": dt.strftime("%G-W%V-%uT%H:%M:%S%z"),
"strict_weekyear": dt.strftime("%G"),
"weekyear": dt.strftime("%G"),
"strict_weekyear_week": dt.strftime("%G-W%V"),
"weekyear_week": dt.strftime("%G-W%V"),
"strict_weekyear_week_day": dt.strftime("%G-W%V-%u"),
"weekyear_week_day": dt.strftime("%G-W%V-%u"),
"strict_year": dt.strftime("%Y"),
"year": dt.strftime("%Y"),
"strict_year_month": dt.strftime("%Y-%m"),
"year_month": dt.strftime("%Y-%m"),
"strict_year_month_day": dt.strftime("%Y-%m-%d"),
"year_month_day": dt.strftime("%Y-%m-%d"),
}
return time_formats
time_formats = {
"epoch_millis": "%Y-%m-%dT%H:%M:%S.%f",
"epoch_second": "%Y-%m-%dT%H:%M:%S",
"strict_date_optional_time": "%Y-%m-%dT%H:%M:%S.%f%z",
"basic_date": "%Y%m%d",
"basic_date_time": "%Y%m%dT%H%M%S.%f",
"basic_date_time_no_millis": "%Y%m%dT%H%M%S%z",
"basic_ordinal_date": "%Y%j",
"basic_ordinal_date_time": "%Y%jT%H%M%S.%f%z",
"basic_ordinal_date_time_no_millis": "%Y%jT%H%M%S%z",
"basic_time": "%H%M%S.%f%z",
"basic_time_no_millis": "%H%M%S%z",
"basic_t_time": "T%H%M%S.%f%z",
"basic_t_time_no_millis": "T%H%M%S%z",
"basic_week_date": "%GW%V%u",
"basic_week_date_time": "%GW%V%uT%H%M%S.%f%z",
"basic_week_date_time_no_millis": "%GW%V%uT%H%M%S%z",
"date": "%Y-%m-%d",
"strict_date": "%Y-%m-%d",
"strict_date_hour": "%Y-%m-%dT%H",
"date_hour": "%Y-%m-%dT%H",
"strict_date_hour_minute": "%Y-%m-%dT%H:%M",
"date_hour_minute": "%Y-%m-%dT%H:%M",
"strict_date_hour_minute_second": "%Y-%m-%dT%H:%M:%S",
"date_hour_minute_second": "%Y-%m-%dT%H:%M:%S",
"strict_date_hour_minute_second_fraction": "%Y-%m-%dT%H:%M:%S.%f",
"date_hour_minute_second_fraction": "%Y-%m-%dT%H:%M:%S.%f",
"strict_date_hour_minute_second_millis": "%Y-%m-%dT%H:%M:%S.%f",
"date_hour_minute_second_millis": "%Y-%m-%dT%H:%M:%S.%f",
"strict_date_time": "%Y-%m-%dT%H:%M:%S.%f%z",
"date_time": "%Y-%m-%dT%H:%M:%S.%f%z",
"strict_date_time_no_millis": "%Y-%m-%dT%H:%M:%S%z",
"date_time_no_millis": "%Y-%m-%dT%H:%M:%S%z",
"strict_hour": "%H",
"hour": "%H",
"strict_hour_minute": "%H:%M",
"hour_minute": "%H:%M",
"strict_hour_minute_second": "%H:%M:%S",
"hour_minute_second": "%H:%M:%S",
"strict_hour_minute_second_fraction": "%H:%M:%S.%f",
"hour_minute_second_fraction": "%H:%M:%S.%f",
"strict_hour_minute_second_millis": "%H:%M:%S.%f",
"hour_minute_second_millis": "%H:%M:%S.%f",
"strict_ordinal_date": "%Y-%j",
"ordinal_date": "%Y-%j",
"strict_ordinal_date_time": "%Y-%jT%H:%M:%S.%f%z",
"ordinal_date_time": "%Y-%jT%H:%M:%S.%f%z",
"strict_ordinal_date_time_no_millis": "%Y-%jT%H:%M:%S%z",
"ordinal_date_time_no_millis": "%Y-%jT%H:%M:%S%z",
"strict_time": "%H:%M:%S.%f%z",
"time": "%H:%M:%S.%f%z",
"strict_time_no_millis": "%H:%M:%S%z",
"time_no_millis": "%H:%M:%S%z",
"strict_t_time": "T%H:%M:%S.%f%z",
"t_time": "T%H:%M:%S.%f%z",
"strict_t_time_no_millis": "T%H:%M:%S%z",
"t_time_no_millis": "T%H:%M:%S%z",
"strict_week_date": "%G-W%V-%u",
"week_date": "%G-W%V-%u",
"strict_week_date_time": "%G-W%V-%uT%H:%M:%S.%f%z",
"week_date_time": "%G-W%V-%uT%H:%M:%S.%f%z",
"strict_week_date_time_no_millis": "%G-W%V-%uT%H:%M:%S%z",
"week_date_time_no_millis": "%G-W%V-%uT%H:%M:%S%z",
"strict_weekyear_week_day": "%G-W%V-%u",
"weekyear_week_day": "%G-W%V-%u",
"strict_year": "%Y",
"year": "%Y",
"strict_year_month": "%Y-%m",
"year_month": "%Y-%m",
"strict_year_month_day": "%Y-%m-%d",
"year_month_day": "%Y-%m-%d"
}
# excluding these formats as pandas throws a ValueError
# "strict_weekyear": ("%G", None) - not supported in pandas
# "strict_weekyear_week": ("%G-W%V", None),
# E ValueError: ISO year directive '%G' must be used with the ISO week directive '%V' and a weekday directive '%A', '%a', '%w', or '%u'.

View File

@ -51,6 +51,7 @@ def _setup_data(es):
print("Done", index_name)
def _update_max_compilations_limit(es, limit="10000/1m"):
print('Updating script.max_compilations_rate to ', limit)
cluster_client = ClusterClient(es)
@ -79,7 +80,6 @@ if __name__ == '__main__':
# Create connection to Elasticsearch - use defaults
print('Connecting to ES', ELASTICSEARCH_HOST)
es = ES_TEST_CLIENT
_setup_data(es)
_setup_test_mappings(es)