eland/eland/tests/client/test_mappings_pytest.py
2019-06-28 14:43:20 +00:00

91 lines
4.1 KiB
Python

# File called _pytest for PyCharm compatability
from pandas.util.testing import (
assert_series_equal, assert_frame_equal)
import eland as ed
from eland.tests import *
from eland.tests.common import TestData
class TestMapping(TestData):
# Requires 'setup_tests.py' to be run prior to this
def test_fields(self):
mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields()
assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings.mappings_capabilities['es_dtype']))
assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields()
def test_copy(self):
mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields()
assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings.mappings_capabilities['es_dtype']))
assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields()
# Pick 1 source field
columns = ['dest_location']
mappings_copy1 = ed.Mappings(mappings=mappings, columns=columns)
assert columns == mappings_copy1.all_fields()
assert len(columns) == mappings_copy1.count_source_fields()
# Pick 3 source fields (out of order)
columns = ['dest_location', 'city', 'user_name']
mappings_copy2 = ed.Mappings(mappings=mappings, columns=columns)
assert columns == mappings_copy2.all_fields()
assert len(columns) == mappings_copy2.count_source_fields()
# Check original is still ok
assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields()
assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings.mappings_capabilities['es_dtype']))
assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields()
def test_dtypes(self):
mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
expected_dtypes = pd.Series(
{'city': 'object', 'content': 'object', 'dest_location': 'object', 'email': 'object',
'maps-telemetry.attributesPerMap.dataSourcesCount.avg': 'int64',
'maps-telemetry.attributesPerMap.dataSourcesCount.max': 'int64',
'maps-telemetry.attributesPerMap.dataSourcesCount.min': 'int64',
'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.avg': 'float64',
'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.max': 'int64',
'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.min': 'int64',
'my_join_field': 'object', 'name': 'object', 'origin_location.lat': 'object',
'origin_location.lon': 'object', 'text': 'object', 'tweeted_at': 'datetime64[ns]',
'type': 'object', 'user_name': 'object'})
assert_series_equal(expected_dtypes, mappings.dtypes())
def test_get_dtype_counts(self):
mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
expected_get_dtype_counts = pd.Series({'datetime64[ns]': 1, 'float64': 1, 'int64': 5, 'object': 11})
assert_series_equal(expected_get_dtype_counts, mappings.get_dtype_counts())
def test_mapping_capabilities(self):
mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME)
field_capabilities = mappings.field_capabilities('city')
assert True == field_capabilities['_source']
assert 'text' == field_capabilities['es_dtype']
assert 'object' == field_capabilities['pd_dtype']
assert True == field_capabilities['searchable']
assert False == field_capabilities['aggregatable']
field_capabilities = mappings.field_capabilities('city.raw')
assert False == field_capabilities['_source']
assert 'keyword' == field_capabilities['es_dtype']
assert 'object' == field_capabilities['pd_dtype']
assert True == field_capabilities['searchable']
assert True == field_capabilities['aggregatable']