mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Added support for 2 date formats: (#70)
* Adds support for multiple date formats
This commit is contained in:
parent
13141645f7
commit
99bfea42b6
@ -50,15 +50,18 @@ class Mappings:
|
||||
mappings: Mappings
|
||||
Object to copy
|
||||
"""
|
||||
|
||||
# here we keep track of the format of any date fields
|
||||
self._date_fields_format = {}
|
||||
if (client is not None) and (index_pattern is not None):
|
||||
get_mapping = client.get_mapping(index=index_pattern)
|
||||
|
||||
# Get all fields (including all nested) and then all field_caps
|
||||
all_fields = Mappings._extract_fields_from_mapping(get_mapping)
|
||||
all_fields, self._date_fields_format = Mappings._extract_fields_from_mapping(get_mapping)
|
||||
all_fields_caps = client.field_caps(index=index_pattern, fields='*')
|
||||
|
||||
# Get top level (not sub-field multifield) mappings
|
||||
source_fields = Mappings._extract_fields_from_mapping(get_mapping, source_only=True)
|
||||
source_fields, _ = Mappings._extract_fields_from_mapping(get_mapping, source_only=True)
|
||||
|
||||
# Populate capability matrix of fields
|
||||
# field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source
|
||||
@ -76,7 +79,7 @@ class Mappings:
|
||||
self._source_field_pd_dtypes[field_name] = pd_dtype
|
||||
|
||||
@staticmethod
|
||||
def _extract_fields_from_mapping(mappings, source_only=False):
|
||||
def _extract_fields_from_mapping(mappings, source_only=False, date_format=None):
|
||||
"""
|
||||
Extract all field names and types from a mapping.
|
||||
```
|
||||
@ -118,11 +121,14 @@ class Mappings:
|
||||
|
||||
Returns
|
||||
-------
|
||||
fields: dict
|
||||
Dict of field names and types
|
||||
fields, dates_format: tuple(dict, dict)
|
||||
where:
|
||||
fields: Dict of field names and types
|
||||
dates_format: Dict of date field names and format
|
||||
|
||||
"""
|
||||
fields = {}
|
||||
dates_format = {}
|
||||
|
||||
# Recurse until we get a 'type: xxx'
|
||||
def flatten(x, name=''):
|
||||
@ -131,7 +137,9 @@ class Mappings:
|
||||
if a == 'type' and type(x[a]) is str: # 'type' can be a name of a field
|
||||
field_name = name[:-1]
|
||||
field_type = x[a]
|
||||
|
||||
# if field_type is 'date' keep track of the format info when available
|
||||
if field_type == "date" and "format" in x:
|
||||
dates_format[field_name] = x["format"]
|
||||
# If there is a conflicting type, warn - first values added wins
|
||||
if field_name in fields and fields[field_name] != field_type:
|
||||
warnings.warn("Field {} has conflicting types {} != {}".
|
||||
@ -150,7 +158,7 @@ class Mappings:
|
||||
|
||||
flatten(properties)
|
||||
|
||||
return fields
|
||||
return fields, dates_format
|
||||
|
||||
@staticmethod
|
||||
def _create_capability_matrix(all_fields, source_fields, all_fields_caps):
|
||||
@ -367,6 +375,19 @@ class Mappings:
|
||||
"""
|
||||
return self._mappings_capabilities.loc[field_name]
|
||||
|
||||
def get_date_field_format(self, field_name):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
field_name: str
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
A dictionary (for date fields) containing the mapping {field_name:format}
|
||||
"""
|
||||
return self._date_fields_format.get(field_name)
|
||||
|
||||
def source_field_pd_dtype(self, field_name):
|
||||
"""
|
||||
Parameters
|
||||
|
@ -1,3 +1,6 @@
|
||||
import warnings
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
@ -282,14 +285,10 @@ class ElandQueryCompiler:
|
||||
|
||||
# Coerce types - for now just datetime
|
||||
if pd_dtype == 'datetime64[ns]':
|
||||
# TODO - this doesn't work for certain ES date formats
|
||||
# e.g. "@timestamp" : {
|
||||
# "type" : "date",
|
||||
# "format" : "epoch_millis"
|
||||
# }
|
||||
# 1484053499256 - we need to check ES type and format and add conversions like:
|
||||
# pd.to_datetime(x, unit='ms')
|
||||
x = pd.to_datetime(x)
|
||||
x = elasticsearch_date_to_pandas_date(
|
||||
x,
|
||||
self._mappings.get_date_field_format(field_name)
|
||||
)
|
||||
|
||||
# Elasticsearch can have multiple values for a field. These are represented as lists, so
|
||||
# create lists for this pivot (see notes above)
|
||||
@ -648,3 +647,195 @@ class ElandQueryCompiler:
|
||||
field_to_display_names=self._field_to_display_names.copy(),
|
||||
display_to_field_names=self._display_to_field_names.copy()
|
||||
)
|
||||
|
||||
|
||||
def elasticsearch_date_to_pandas_date(value: Union[int, str], date_format: str) -> pd.Timestamp:
|
||||
"""
|
||||
Given a specific Elasticsearch format for a date datatype, returns the
|
||||
'partial' `to_datetime` function to parse a given value in that format
|
||||
|
||||
**Date Formats: https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-date-format.html#built-in-date-formats
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value: Union[int, str]
|
||||
The date value.
|
||||
date_format: str
|
||||
The Elasticsearch date format (ex. 'epoch_millis', 'epoch_second', etc.)
|
||||
|
||||
Returns
|
||||
-------
|
||||
datetime: pd.Timestamp
|
||||
From https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html
|
||||
Date formats can be customised, but if no format is specified then it uses the default:
|
||||
"strict_date_optional_time||epoch_millis"
|
||||
Therefore if no format is specified we assume either strict_date_optional_time
|
||||
or epoch_millis.
|
||||
"""
|
||||
|
||||
if date_format is None:
|
||||
try:
|
||||
value = int(value)
|
||||
return pd.to_datetime(value, unit='ms')
|
||||
except ValueError:
|
||||
return pd.to_datetime(value)
|
||||
elif date_format == "epoch_millis":
|
||||
return pd.to_datetime(value, unit='ms')
|
||||
elif date_format == "epoch_second":
|
||||
return pd.to_datetime(value, unit='s')
|
||||
elif date_format == "strict_date_optional_time":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "basic_date":
|
||||
return pd.to_datetime(value, format="%Y%m%d")
|
||||
elif date_format == "basic_date_time":
|
||||
return pd.to_datetime(value, format="%Y%m%dT%H%M%S.%f", exact=False)
|
||||
elif date_format == "basic_date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%Y%m%dT%H%M%S%z")
|
||||
elif date_format == "basic_ordinal_date":
|
||||
return pd.to_datetime(value, format="%Y%j")
|
||||
elif date_format == "basic_ordinal_date_time":
|
||||
return pd.to_datetime(value, format="%Y%jT%H%M%S.%f%z", exact=False)
|
||||
elif date_format == "basic_ordinal_date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%Y%jT%H%M%S%z")
|
||||
elif date_format == "basic_time":
|
||||
return pd.to_datetime(value, format="%H%M%S.%f%z", exact=False)
|
||||
elif date_format == "basic_time_no_millis":
|
||||
return pd.to_datetime(value, format="%H%M%S%z")
|
||||
elif date_format == "basic_t_time":
|
||||
return pd.to_datetime(value, format="T%H%M%S.%f%z", exact=False)
|
||||
elif date_format == "basic_t_time_no_millis":
|
||||
return pd.to_datetime(value, format="T%H%M%S%z")
|
||||
elif date_format == "basic_week_date":
|
||||
return pd.to_datetime(value, format="%GW%V%u")
|
||||
elif date_format == "basic_week_date_time":
|
||||
return pd.to_datetime(value, format="%GW%V%uT%H%M%S.%f%z", exact=False)
|
||||
elif date_format == "basic_week_date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%GW%V%uT%H%M%S%z")
|
||||
elif date_format == "strict_date":
|
||||
return pd.to_datetime(value, format="%Y-%m-%d")
|
||||
elif date_format == "date":
|
||||
return pd.to_datetime(value, format="%Y-%m-%d")
|
||||
elif date_format == "strict_date_hour":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H")
|
||||
elif date_format == "date_hour":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H")
|
||||
elif date_format == "strict_date_hour_minute":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M")
|
||||
elif date_format == "date_hour_minute":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M")
|
||||
elif date_format == "strict_date_hour_minute_second":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S")
|
||||
elif date_format == "date_hour_minute_second":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S")
|
||||
elif date_format == "strict_date_hour_minute_second_fraction":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False)
|
||||
elif date_format == "date_hour_minute_second_fraction":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False)
|
||||
elif date_format == "strict_date_hour_minute_second_millis":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False)
|
||||
elif date_format == "date_hour_minute_second_millis":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f", exact=False)
|
||||
elif date_format == "strict_date_time":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "date_time":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "strict_date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S%z")
|
||||
elif date_format == "date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%Y-%m-%dT%H:%M:%S%z")
|
||||
elif date_format == "strict_hour":
|
||||
return pd.to_datetime(value, format="%H")
|
||||
elif date_format == "hour":
|
||||
return pd.to_datetime(value, format="%H")
|
||||
elif date_format == "strict_hour_minute":
|
||||
return pd.to_datetime(value, format="%H:%M")
|
||||
elif date_format == "hour_minute":
|
||||
return pd.to_datetime(value, format="%H:%M")
|
||||
elif date_format == "strict_hour_minute_second":
|
||||
return pd.to_datetime(value, format="%H:%M:%S")
|
||||
elif date_format == "hour_minute_second":
|
||||
return pd.to_datetime(value, format="%H:%M:%S")
|
||||
elif date_format == "strict_hour_minute_second_fraction":
|
||||
return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False)
|
||||
elif date_format == "hour_minute_second_fraction":
|
||||
return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False)
|
||||
elif date_format == "strict_hour_minute_second_millis":
|
||||
return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False)
|
||||
elif date_format == "hour_minute_second_millis":
|
||||
return pd.to_datetime(value, format="%H:%M:%S.%f", exact=False)
|
||||
elif date_format == "strict_ordinal_date":
|
||||
return pd.to_datetime(value, format="%Y-%j")
|
||||
elif date_format == "ordinal_date":
|
||||
return pd.to_datetime(value, format="%Y-%j")
|
||||
elif date_format == "strict_ordinal_date_time":
|
||||
return pd.to_datetime(value, format="%Y-%jT%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "ordinal_date_time":
|
||||
return pd.to_datetime(value, format="%Y-%jT%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "strict_ordinal_date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%Y-%jT%H:%M:%S%z")
|
||||
elif date_format == "ordinal_date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%Y-%jT%H:%M:%S%z")
|
||||
elif date_format == "strict_time":
|
||||
return pd.to_datetime(value, format="%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "time":
|
||||
return pd.to_datetime(value, format="%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "strict_time_no_millis":
|
||||
return pd.to_datetime(value, format="%H:%M:%S%z")
|
||||
elif date_format == "time_no_millis":
|
||||
return pd.to_datetime(value, format="%H:%M:%S%z")
|
||||
elif date_format == "strict_t_time":
|
||||
return pd.to_datetime(value, format="T%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "t_time":
|
||||
return pd.to_datetime(value, format="T%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "strict_t_time_no_millis":
|
||||
return pd.to_datetime(value, format="T%H:%M:%S%z")
|
||||
elif date_format == "t_time_no_millis":
|
||||
return pd.to_datetime(value, format="T%H:%M:%S%z")
|
||||
elif date_format == "strict_week_date":
|
||||
return pd.to_datetime(value, format="%G-W%V-%u")
|
||||
elif date_format == "week_date":
|
||||
return pd.to_datetime(value, format="%G-W%V-%u")
|
||||
elif date_format == "strict_week_date_time":
|
||||
return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "week_date_time":
|
||||
return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S.%f%z", exact=False)
|
||||
elif date_format == "strict_week_date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S%z")
|
||||
elif date_format == "week_date_time_no_millis":
|
||||
return pd.to_datetime(value, format="%G-W%V-%uT%H:%M:%S%z")
|
||||
elif date_format == "strict_weekyear" or date_format == "weekyear":
|
||||
# TODO investigate if there is a way of converting this
|
||||
raise NotImplementedError("strict_weekyear is not implemented due to support in pandas")
|
||||
return pd.to_datetime(value, format="%G")
|
||||
# Not supported in pandas
|
||||
# ValueError: ISO year directive '%G' must be used with the ISO week directive '%V'
|
||||
# and a weekday directive '%A', '%a', '%w', or '%u'.
|
||||
elif date_format == "strict_weekyear_week" or date_format == "weekyear_week":
|
||||
# TODO investigate if there is a way of converting this
|
||||
raise NotImplementedError("strict_weekyear_week is not implemented due to support in pandas")
|
||||
return pd.to_datetime(value, format="%G-W%V")
|
||||
# Not supported in pandas
|
||||
# ValueError: ISO year directive '%G' must be used with the ISO week directive '%V'
|
||||
# and a weekday directive '%A', '%a', '%w', or '%u'.
|
||||
elif date_format == "strict_weekyear_week_day":
|
||||
return pd.to_datetime(value, format="%G-W%V-%u")
|
||||
elif date_format == "weekyear_week_day":
|
||||
return pd.to_datetime(value, format="%G-W%V-%u")
|
||||
elif date_format == "strict_year":
|
||||
return pd.to_datetime(value, format="%Y")
|
||||
elif date_format == "year":
|
||||
return pd.to_datetime(value, format="%Y")
|
||||
elif date_format == "strict_year_month":
|
||||
return pd.to_datetime(value, format="%Y-%m")
|
||||
elif date_format == "year_month":
|
||||
return pd.to_datetime(value, format="%Y-%m")
|
||||
elif date_format == "strict_year_month_day":
|
||||
return pd.to_datetime(value, format="%Y-%m-%d")
|
||||
elif date_format == "year_month_day":
|
||||
return pd.to_datetime(value, format="%Y-%m-%d")
|
||||
else:
|
||||
warnings.warn("The '{}' format is not explicitly supported."
|
||||
"Using pandas.to_datetime(value) to parse value".format(date_format),
|
||||
Warning)
|
||||
# TODO investigate how we could generate this just once for a bulk read.
|
||||
return pd.to_datetime(value)
|
||||
|
@ -1,5 +1,7 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
from datetime import datetime
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
@ -7,10 +9,53 @@ import eland as ed
|
||||
from eland.tests.common import ES_TEST_CLIENT
|
||||
from eland.tests.common import TestData
|
||||
from eland.tests.common import assert_pandas_eland_frame_equal
|
||||
from eland.tests.common import assert_pandas_eland_series_equal
|
||||
|
||||
|
||||
class TestDataFrameDateTime(TestData):
|
||||
|
||||
times = ["2019-11-26T19:58:15.246+0000",
|
||||
"1970-01-01T00:00:03.000+0000"]
|
||||
time_index_name = 'test_time_formats'
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
""" setup any state specific to the execution of the given class (which
|
||||
usually contains tests).
|
||||
"""
|
||||
es = ES_TEST_CLIENT
|
||||
if es.indices.exists(cls.time_index_name):
|
||||
es.indices.delete(index=cls.time_index_name)
|
||||
dts = [datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
for time in cls.times]
|
||||
|
||||
time_formats_docs = [TestDataFrameDateTime.get_time_values_from_datetime(dt)
|
||||
for dt in dts]
|
||||
mappings = {'properties': {}}
|
||||
|
||||
for field_name, field_value in time_formats_docs[0].items():
|
||||
mappings['properties'][field_name] = {}
|
||||
mappings['properties'][field_name]['type'] = 'date'
|
||||
mappings['properties'][field_name]['format'] = field_name
|
||||
|
||||
body = {"mappings": mappings}
|
||||
index = 'test_time_formats'
|
||||
es.indices.delete(index=index, ignore=[400, 404])
|
||||
es.indices.create(index=index, body=body)
|
||||
|
||||
for i, time_formats in enumerate(time_formats_docs):
|
||||
es.index(index=index, body=time_formats, id=i)
|
||||
es.indices.refresh(index=index)
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
""" teardown any state that was previously setup with a call to
|
||||
setup_class.
|
||||
"""
|
||||
|
||||
es = ES_TEST_CLIENT
|
||||
es.indices.delete(index=cls.time_index_name)
|
||||
|
||||
def test_datetime_to_ms(self):
|
||||
df = pd.DataFrame(data={'A': np.random.rand(3),
|
||||
'B': 1,
|
||||
@ -41,3 +86,179 @@ class TestDataFrameDateTime(TestData):
|
||||
ed_df_head = ed_df.head()
|
||||
|
||||
assert_pandas_eland_frame_equal(df, ed_df_head)
|
||||
|
||||
def test_all_formats(self):
|
||||
index_name = self.time_index_name
|
||||
ed_df = ed.read_es(ES_TEST_CLIENT, index_name)
|
||||
|
||||
for format_name in self.time_formats.keys():
|
||||
times = [pd.to_datetime(datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
.strftime(self.time_formats[format_name]),
|
||||
format=self.time_formats[format_name])
|
||||
for dt in self.times]
|
||||
|
||||
ed_series = ed_df[format_name]
|
||||
pd_series = pd.Series(times,
|
||||
index=[str(i) for i in range(len(self.times))],
|
||||
name=format_name)
|
||||
|
||||
assert_pandas_eland_series_equal(pd_series, ed_series)
|
||||
|
||||
@staticmethod
|
||||
def get_time_values_from_datetime(dt: datetime) -> dict:
|
||||
time_formats = {
|
||||
"epoch_millis": int(dt.timestamp() * 1000),
|
||||
"epoch_second": int(dt.timestamp()),
|
||||
"strict_date_optional_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"basic_date": dt.strftime("%Y%m%d"),
|
||||
"basic_date_time": dt.strftime("%Y%m%dT%H%M%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"basic_date_time_no_millis": dt.strftime("%Y%m%dT%H%M%S%z"),
|
||||
"basic_ordinal_date": dt.strftime("%Y%j"),
|
||||
"basic_ordinal_date_time": dt.strftime("%Y%jT%H%M%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"basic_ordinal_date_time_no_millis": dt.strftime("%Y%jT%H%M%S%z"),
|
||||
"basic_time": dt.strftime("%H%M%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"basic_time_no_millis": dt.strftime("%H%M%S%z"),
|
||||
"basic_t_time": dt.strftime("T%H%M%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"basic_t_time_no_millis": dt.strftime("T%H%M%S%z"),
|
||||
"basic_week_date": dt.strftime("%GW%V%u"),
|
||||
"basic_week_date_time": dt.strftime("%GW%V%uT%H%M%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"basic_week_date_time_no_millis": dt.strftime("%GW%V%uT%H%M%S%z"),
|
||||
"strict_date": dt.strftime("%Y-%m-%d"),
|
||||
"date": dt.strftime("%Y-%m-%d"),
|
||||
"strict_date_hour": dt.strftime("%Y-%m-%dT%H"),
|
||||
"date_hour": dt.strftime("%Y-%m-%dT%H"),
|
||||
"strict_date_hour_minute": dt.strftime("%Y-%m-%dT%H:%M"),
|
||||
"date_hour_minute": dt.strftime("%Y-%m-%dT%H:%M"),
|
||||
"strict_date_hour_minute_second": dt.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"date_hour_minute_second": dt.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"strict_date_hour_minute_second_fraction": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
|
||||
"date_hour_minute_second_fraction": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
|
||||
"strict_date_hour_minute_second_millis": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
|
||||
"date_hour_minute_second_millis": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
|
||||
"strict_date_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"date_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"strict_date_time_no_millis": dt.strftime("%Y-%m-%dT%H:%M:%S%z"),
|
||||
"date_time_no_millis": dt.strftime("%Y-%m-%dT%H:%M:%S%z"),
|
||||
"strict_hour": dt.strftime("%H"),
|
||||
"hour": dt.strftime("%H"),
|
||||
"strict_hour_minute": dt.strftime("%H:%M"),
|
||||
"hour_minute": dt.strftime("%H:%M"),
|
||||
"strict_hour_minute_second": dt.strftime("%H:%M:%S"),
|
||||
"hour_minute_second": dt.strftime("%H:%M:%S"),
|
||||
"strict_hour_minute_second_fraction": dt.strftime("%H:%M:%S.%f")[:-3],
|
||||
"hour_minute_second_fraction": dt.strftime("%H:%M:%S.%f")[:-3],
|
||||
"strict_hour_minute_second_millis": dt.strftime("%H:%M:%S.%f")[:-3],
|
||||
"hour_minute_second_millis": dt.strftime("%H:%M:%S.%f")[:-3],
|
||||
"strict_ordinal_date": dt.strftime("%Y-%j"),
|
||||
"ordinal_date": dt.strftime("%Y-%j"),
|
||||
"strict_ordinal_date_time": dt.strftime("%Y-%jT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"ordinal_date_time": dt.strftime("%Y-%jT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"strict_ordinal_date_time_no_millis": dt.strftime("%Y-%jT%H:%M:%S%z"),
|
||||
"ordinal_date_time_no_millis": dt.strftime("%Y-%jT%H:%M:%S%z"),
|
||||
"strict_time": dt.strftime("%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"time": dt.strftime("%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"strict_time_no_millis": dt.strftime("%H:%M:%S%z"),
|
||||
"time_no_millis": dt.strftime("%H:%M:%S%z"),
|
||||
"strict_t_time": dt.strftime("T%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"t_time": dt.strftime("T%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"strict_t_time_no_millis": dt.strftime("T%H:%M:%S%z"),
|
||||
"t_time_no_millis": dt.strftime("T%H:%M:%S%z"),
|
||||
"strict_week_date": dt.strftime("%G-W%V-%u"),
|
||||
"week_date": dt.strftime("%G-W%V-%u"),
|
||||
"strict_week_date_time": dt.strftime("%G-W%V-%uT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"week_date_time": dt.strftime("%G-W%V-%uT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
|
||||
"strict_week_date_time_no_millis": dt.strftime("%G-W%V-%uT%H:%M:%S%z"),
|
||||
"week_date_time_no_millis": dt.strftime("%G-W%V-%uT%H:%M:%S%z"),
|
||||
"strict_weekyear": dt.strftime("%G"),
|
||||
"weekyear": dt.strftime("%G"),
|
||||
"strict_weekyear_week": dt.strftime("%G-W%V"),
|
||||
"weekyear_week": dt.strftime("%G-W%V"),
|
||||
"strict_weekyear_week_day": dt.strftime("%G-W%V-%u"),
|
||||
"weekyear_week_day": dt.strftime("%G-W%V-%u"),
|
||||
"strict_year": dt.strftime("%Y"),
|
||||
"year": dt.strftime("%Y"),
|
||||
"strict_year_month": dt.strftime("%Y-%m"),
|
||||
"year_month": dt.strftime("%Y-%m"),
|
||||
"strict_year_month_day": dt.strftime("%Y-%m-%d"),
|
||||
"year_month_day": dt.strftime("%Y-%m-%d"),
|
||||
}
|
||||
|
||||
return time_formats
|
||||
|
||||
time_formats = {
|
||||
"epoch_millis": "%Y-%m-%dT%H:%M:%S.%f",
|
||||
"epoch_second": "%Y-%m-%dT%H:%M:%S",
|
||||
"strict_date_optional_time": "%Y-%m-%dT%H:%M:%S.%f%z",
|
||||
"basic_date": "%Y%m%d",
|
||||
"basic_date_time": "%Y%m%dT%H%M%S.%f",
|
||||
"basic_date_time_no_millis": "%Y%m%dT%H%M%S%z",
|
||||
"basic_ordinal_date": "%Y%j",
|
||||
"basic_ordinal_date_time": "%Y%jT%H%M%S.%f%z",
|
||||
"basic_ordinal_date_time_no_millis": "%Y%jT%H%M%S%z",
|
||||
"basic_time": "%H%M%S.%f%z",
|
||||
"basic_time_no_millis": "%H%M%S%z",
|
||||
"basic_t_time": "T%H%M%S.%f%z",
|
||||
"basic_t_time_no_millis": "T%H%M%S%z",
|
||||
"basic_week_date": "%GW%V%u",
|
||||
"basic_week_date_time": "%GW%V%uT%H%M%S.%f%z",
|
||||
"basic_week_date_time_no_millis": "%GW%V%uT%H%M%S%z",
|
||||
"date": "%Y-%m-%d",
|
||||
"strict_date": "%Y-%m-%d",
|
||||
"strict_date_hour": "%Y-%m-%dT%H",
|
||||
"date_hour": "%Y-%m-%dT%H",
|
||||
"strict_date_hour_minute": "%Y-%m-%dT%H:%M",
|
||||
"date_hour_minute": "%Y-%m-%dT%H:%M",
|
||||
"strict_date_hour_minute_second": "%Y-%m-%dT%H:%M:%S",
|
||||
"date_hour_minute_second": "%Y-%m-%dT%H:%M:%S",
|
||||
"strict_date_hour_minute_second_fraction": "%Y-%m-%dT%H:%M:%S.%f",
|
||||
"date_hour_minute_second_fraction": "%Y-%m-%dT%H:%M:%S.%f",
|
||||
"strict_date_hour_minute_second_millis": "%Y-%m-%dT%H:%M:%S.%f",
|
||||
"date_hour_minute_second_millis": "%Y-%m-%dT%H:%M:%S.%f",
|
||||
"strict_date_time": "%Y-%m-%dT%H:%M:%S.%f%z",
|
||||
"date_time": "%Y-%m-%dT%H:%M:%S.%f%z",
|
||||
"strict_date_time_no_millis": "%Y-%m-%dT%H:%M:%S%z",
|
||||
"date_time_no_millis": "%Y-%m-%dT%H:%M:%S%z",
|
||||
"strict_hour": "%H",
|
||||
"hour": "%H",
|
||||
"strict_hour_minute": "%H:%M",
|
||||
"hour_minute": "%H:%M",
|
||||
"strict_hour_minute_second": "%H:%M:%S",
|
||||
"hour_minute_second": "%H:%M:%S",
|
||||
"strict_hour_minute_second_fraction": "%H:%M:%S.%f",
|
||||
"hour_minute_second_fraction": "%H:%M:%S.%f",
|
||||
"strict_hour_minute_second_millis": "%H:%M:%S.%f",
|
||||
"hour_minute_second_millis": "%H:%M:%S.%f",
|
||||
"strict_ordinal_date": "%Y-%j",
|
||||
"ordinal_date": "%Y-%j",
|
||||
"strict_ordinal_date_time": "%Y-%jT%H:%M:%S.%f%z",
|
||||
"ordinal_date_time": "%Y-%jT%H:%M:%S.%f%z",
|
||||
"strict_ordinal_date_time_no_millis": "%Y-%jT%H:%M:%S%z",
|
||||
"ordinal_date_time_no_millis": "%Y-%jT%H:%M:%S%z",
|
||||
"strict_time": "%H:%M:%S.%f%z",
|
||||
"time": "%H:%M:%S.%f%z",
|
||||
"strict_time_no_millis": "%H:%M:%S%z",
|
||||
"time_no_millis": "%H:%M:%S%z",
|
||||
"strict_t_time": "T%H:%M:%S.%f%z",
|
||||
"t_time": "T%H:%M:%S.%f%z",
|
||||
"strict_t_time_no_millis": "T%H:%M:%S%z",
|
||||
"t_time_no_millis": "T%H:%M:%S%z",
|
||||
"strict_week_date": "%G-W%V-%u",
|
||||
"week_date": "%G-W%V-%u",
|
||||
"strict_week_date_time": "%G-W%V-%uT%H:%M:%S.%f%z",
|
||||
"week_date_time": "%G-W%V-%uT%H:%M:%S.%f%z",
|
||||
"strict_week_date_time_no_millis": "%G-W%V-%uT%H:%M:%S%z",
|
||||
"week_date_time_no_millis": "%G-W%V-%uT%H:%M:%S%z",
|
||||
"strict_weekyear_week_day": "%G-W%V-%u",
|
||||
"weekyear_week_day": "%G-W%V-%u",
|
||||
"strict_year": "%Y",
|
||||
"year": "%Y",
|
||||
"strict_year_month": "%Y-%m",
|
||||
"year_month": "%Y-%m",
|
||||
"strict_year_month_day": "%Y-%m-%d",
|
||||
"year_month_day": "%Y-%m-%d"
|
||||
}
|
||||
|
||||
# excluding these formats as pandas throws a ValueError
|
||||
# "strict_weekyear": ("%G", None) - not supported in pandas
|
||||
# "strict_weekyear_week": ("%G-W%V", None),
|
||||
# E ValueError: ISO year directive '%G' must be used with the ISO week directive '%V' and a weekday directive '%A', '%a', '%w', or '%u'.
|
||||
|
@ -51,6 +51,7 @@ def _setup_data(es):
|
||||
|
||||
print("Done", index_name)
|
||||
|
||||
|
||||
def _update_max_compilations_limit(es, limit="10000/1m"):
|
||||
print('Updating script.max_compilations_rate to ', limit)
|
||||
cluster_client = ClusterClient(es)
|
||||
@ -79,7 +80,6 @@ if __name__ == '__main__':
|
||||
# Create connection to Elasticsearch - use defaults
|
||||
print('Connecting to ES', ELASTICSEARCH_HOST)
|
||||
es = ES_TEST_CLIENT
|
||||
|
||||
|
||||
_setup_data(es)
|
||||
_setup_test_mappings(es)
|
||||
|
Loading…
x
Reference in New Issue
Block a user