Feature/arithmetic ops (#102)

* Adding python 3.5 compatibility. Main issue is ordering of dictionaries. * Updating notebooks with 3.7 results. * Removing tempoorary code. * Defaulting to OrderedDict for python 3.5 + lint all code All code reformated by PyCharm and inspection results analysed. * Adding support for multiple arithmetic operations. Added new 'arithmetics' file to manage this process. More tests to be added + cleanup. * Signficant refactor to arithmetics and mappings. Work in progress. Tests don't pass. * Major refactor to Mappings. Field name mappings were stored in different places (Mappings, QueryCompiler, Operations) and needed to be keep in sync. With the addition of complex arithmetic operations this became complex and difficult to maintain. Therefore, all field naming is now in 'FieldMappings' which replaces 'Mappings'. Note this commit removes the cache for some of the mapped values and so the code is SIGNIFICANTLY slower on large indices. In addition, the addition of date_format to Mappings has been removed. This again added more unncessary complexity. * Adding OrderedDict for 3.5 compatibility * Fixes to ordering issues with 3.5
2025-07-11 00:02:14 +08:00 · 2020-01-10 08:05:43 +00:00 · 2020-01-10 08:05:43 +00:00 · efe21a6d87
commit efe21a6d87
parent 617583183f
38 changed files with 1651 additions and 993 deletions
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -71,7 +71,8 @@ except ImportError:
 extlinks = {
    'pandas_api_docs': ('https://pandas.pydata.org/pandas-docs/version/0.25.3/reference/api/%s.html', ''),
-    'pandas_user_guide': ('https://pandas.pydata.org/pandas-docs/version/0.25.3/user_guide/%s.html', 'Pandas User Guide/'),
+    'pandas_user_guide': (
    'https://pandas.pydata.org/pandas-docs/version/0.25.3/user_guide/%s.html', 'Pandas User Guide/'),
    'es_api_docs': ('https://www.elastic.co/guide/en/elasticsearch/reference/current/%s.html', '')
 }
@ -106,3 +107,6 @@ html_theme = "pandas_sphinx_theme"
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 # html_static_path = ['_static']
 html_logo = "logo/eland.png"
 html_favicon = "logo/eland_favicon.png"
--- a/docs/source/logo/eland.png
+++ b/docs/source/logo/eland.png
--- a/docs/source/logo/eland_favicon.png
+++ b/docs/source/logo/eland_favicon.png
--- a/eland/init.py
+++ b/eland/init.py
@ -18,7 +18,7 @@ from eland.common import *
 from eland.client import *
 from eland.filter import *
 from eland.index import *
-from eland.mappings import *
+from eland.field_mappings import *
 from eland.query import *
 from eland.operations import *
 from eland.query_compiler import *
--- a/eland/arithmetics.py
+++ b/eland/arithmetics.py
@ -0,0 +1,215 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 from abc import ABC, abstractmethod
 from io import StringIO
 import numpy as np
 class ArithmeticObject(ABC):
    @property
    @abstractmethod
    def value(self):
        pass
    @abstractmethod
    def dtype(self):
        pass
    @abstractmethod
    def resolve(self):
        pass
    @abstractmethod
    def __repr__(self):
        pass
 class ArithmeticString(ArithmeticObject):
    def __init__(self, value):
        self._value = value
    def resolve(self):
        return self.value
    @property
    def dtype(self):
        return np.dtype(object)
    @property
    def value(self):
        return "'{}'".format(self._value)
    def __repr__(self):
        return self.value
 class ArithmeticNumber(ArithmeticObject):
    def __init__(self, value, dtype):
        self._value = value
        self._dtype = dtype
    def resolve(self):
        return self.value
    @property
    def value(self):
        return "{}".format(self._value)
    @property
    def dtype(self):
        return self._dtype
    def __repr__(self):
        return self.value
 class ArithmeticSeries(ArithmeticObject):
    def __init__(self, query_compiler, display_name, dtype):
        task = query_compiler.get_arithmetic_op_fields()
        if task is not None:
            self._value = task._arithmetic_series.value
            self._tasks = task._arithmetic_series._tasks.copy()
            self._dtype = dtype
        else:
            aggregatable_field_name = query_compiler.display_name_to_aggregatable_name(display_name)
            if aggregatable_field_name is None:
                raise ValueError(
                    "Can not perform arithmetic operations on non aggregatable fields"
                    "{} is not aggregatable.".format(display_name)
                )
            self._value = "doc['{}'].value".format(aggregatable_field_name)
            self._tasks = []
            self._dtype = dtype
    @property
    def value(self):
        return self._value
    @property
    def dtype(self):
        return self._dtype
    def __repr__(self):
        buf = StringIO()
        buf.write("Series: {} ".format(self.value))
        buf.write("Tasks: ")
        for task in self._tasks:
            buf.write("{} ".format(repr(task)))
        return buf.getvalue()
    def resolve(self):
        value = self._value
        for task in self._tasks:
            if task.op_name == '__add__':
                value = "({} + {})".format(value, task.object.resolve())
            elif task.op_name == '__truediv__':
                value = "({} / {})".format(value, task.object.resolve())
            elif task.op_name == '__floordiv__':
                value = "Math.floor({} / {})".format(value, task.object.resolve())
            elif task.op_name == '__mod__':
                value = "({} % {})".format(value, task.object.resolve())
            elif task.op_name == '__mul__':
                value = "({} * {})".format(value, task.object.resolve())
            elif task.op_name == '__pow__':
                value = "Math.pow({}, {})".format(value, task.object.resolve())
            elif task.op_name == '__sub__':
                value = "({} - {})".format(value, task.object.resolve())
            elif task.op_name == '__radd__':
                value = "({} + {})".format(task.object.resolve(), value)
            elif task.op_name == '__rtruediv__':
                value = "({} / {})".format(task.object.resolve(), value)
            elif task.op_name == '__rfloordiv__':
                value = "Math.floor({} / {})".format(task.object.resolve(), value)
            elif task.op_name == '__rmod__':
                value = "({} % {})".format(task.object.resolve(), value)
            elif task.op_name == '__rmul__':
                value = "({} * {})".format(task.object.resolve(), value)
            elif task.op_name == '__rpow__':
                value = "Math.pow({}, {})".format(task.object.resolve(), value)
            elif task.op_name == '__rsub__':
                value = "({} - {})".format(task.object.resolve(), value)
        return value
    def arithmetic_operation(self, op_name, right):
        # check if operation is supported (raises on unsupported)
        self.check_is_supported(op_name, right)
        task = ArithmeticTask(op_name, right)
        self._tasks.append(task)
        return self
    def check_is_supported(self, op_name, right):
        # supported set is
        # series.number op_name number (all ops)
        # series.string op_name string (only add)
        # series.string op_name int (only mul)
        # series.string op_name float (none)
        # series.int op_name string (none)
        # series.float op_name string (none)
        # see end of https://pandas.pydata.org/pandas-docs/stable/getting_started/basics.html?highlight=dtype for
        # dtype heirarchy
        if np.issubdtype(self.dtype, np.number) and np.issubdtype(right.dtype, np.number):
            # series.number op_name number (all ops)
            return True
        elif np.issubdtype(self.dtype, np.object_) and np.issubdtype(right.dtype, np.object_):
            # series.string op_name string (only add)
            if op_name == '__add__' or op_name == '__radd__':
                return True
        elif np.issubdtype(self.dtype, np.object_) and np.issubdtype(right.dtype, np.integer):
            # series.string op_name int (only mul)
            if op_name == '__mul__':
                return True
        raise TypeError(
            "Arithmetic operation on incompatible types {} {} {}".format(self.dtype, op_name, right.dtype))
 class ArithmeticTask:
    def __init__(self, op_name, object):
        self._op_name = op_name
        if not isinstance(object, ArithmeticObject):
            raise TypeError("Task requires ArithmeticObject not {}".format(type(object)))
        self._object = object
    def __repr__(self):
        buf = StringIO()
        buf.write("op_name: {} ".format(self.op_name))
        buf.write("object: {} ".format(repr(self.object)))
        return buf.getvalue()
    @property
    def op_name(self):
        return self._op_name
    @property
    def object(self):
        return self._object
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -118,7 +118,7 @@ class DataFrame(NDFrame):
        There are effectively 2 constructors:
        1. client, index_pattern, columns, index_field
-        2. query_compiler (eland.ElandQueryCompiler)
+        2. query_compiler (eland.QueryCompiler)
        The constructor with 'query_compiler' is for internal use only.
        """
@ -531,35 +531,11 @@ class DataFrame(NDFrame):
         is_source_field: False
        Mappings:
         capabilities:
-                            _source   es_dtype        pd_dtype  searchable  aggregatable
+                           es_field_name  is_source es_dtype es_date_format        pd_dtype  is_searchable  is_aggregatable  is_scripted aggregatable_es_field_name
-        AvgTicketPrice         True      float         float64        True          True
+        timestamp              timestamp       True     date           None  datetime64[ns]           True             True        False                  timestamp
-        Cancelled              True    boolean            bool        True          True
+        OriginAirportID  OriginAirportID       True  keyword           None          object           True             True        False            OriginAirportID
-        Carrier                True    keyword          object        True          True
+        DestAirportID      DestAirportID       True  keyword           None          object           True             True        False              DestAirportID
-        Dest                   True    keyword          object        True          True
+        FlightDelayMin    FlightDelayMin       True  integer           None           int64           True             True        False             FlightDelayMin
        DestAirportID          True    keyword          object        True          True
        DestCityName           True    keyword          object        True          True
        DestCountry            True    keyword          object        True          True
        DestLocation           True  geo_point          object        True          True
        DestRegion             True    keyword          object        True          True
        DestWeather            True    keyword          object        True          True
        DistanceKilometers     True      float         float64        True          True
        DistanceMiles          True      float         float64        True          True
        FlightDelay            True    boolean            bool        True          True
        FlightDelayMin         True    integer           int64        True          True
        FlightDelayType        True    keyword          object        True          True
        FlightNum              True    keyword          object        True          True
        FlightTimeHour         True      float         float64        True          True
        FlightTimeMin          True      float         float64        True          True
        Origin                 True    keyword          object        True          True
        OriginAirportID        True    keyword          object        True          True
        OriginCityName         True    keyword          object        True          True
        OriginCountry          True    keyword          object        True          True
        OriginLocation         True  geo_point          object        True          True
        OriginRegion           True    keyword          object        True          True
        OriginWeather          True    keyword          object        True          True
        dayOfWeek              True    integer           int64        True          True
        timestamp              True       date  datetime64[ns]        True          True
         date_fields_format: {}
        Operations:
         tasks: [('boolean_filter': ('boolean_filter': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}})), ('tail': ('sort_field': '_doc', 'count': 5))]
         size: 5
@ -567,8 +543,6 @@ class DataFrame(NDFrame):
         _source: ['timestamp', 'OriginAirportID', 'DestAirportID', 'FlightDelayMin']
         body: {'query': {'bool': {'must': [{'term': {'OriginAirportID': 'AMS'}}, {'range': {'FlightDelayMin': {'gt': 60}}}]}}}
         post_processing: [('sort_index')]
        'field_to_display_names': {}
        'display_to_field_names': {}
        <BLANKLINE>
        """
        buf = StringIO()
--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@ -11,6 +11,18 @@
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 import warnings
 from collections import OrderedDict
@ -18,9 +30,10 @@ import numpy as np
 import pandas as pd
 from pandas.core.dtypes.common import (is_float_dtype, is_bool_dtype, is_integer_dtype, is_datetime_or_timedelta_dtype,
                                       is_string_dtype)
 from pandas.core.dtypes.inference import is_list_like
-class Mappings:
+class FieldMappings:
    """
    General purpose to manage Elasticsearch to/from pandas mappings
@ -30,26 +43,28 @@ class Mappings:
    _mappings_capabilities: pandas.DataFrame
        A data frame summarising the capabilities of the index mapping
-        _source     - is top level field (i.e. not a multi-field sub-field)
+        index                       - the eland display name
        es_dtype    - Elasticsearch field datatype
        pd_dtype    - Pandas datatype
        searchable  - is the field searchable?
        aggregatable- is the field aggregatable?
                                        _source es_dtype    pd_dtype    searchable  aggregatable
        maps-telemetry.min              True    long        int64       True        True
        maps-telemetry.avg              True    float       float64     True        True
        city                            True    text        object      True        False
        user_name                       True    keyword     object      True        True
        origin_location.lat.keyword     False   keyword     object      True        True
        type                            True    keyword     object      True        True
        origin_location.lat             True    text        object      True        False
        es_field_name               - the Elasticsearch field name
        is_source                   - is top level field (i.e. not a multi-field sub-field)
        es_dtype                    - Elasticsearch field datatype
        es_date_format              - Elasticsearch date format (or None)
        pd_dtype                    - Pandas datatype
        is_searchable               - is the field searchable?
        is_aggregatable             - is the field aggregatable?
        is_scripted                 - is the field a scripted_field?
        aggregatable_es_field_name  - either es_field_name (if aggregatable),
                                      or es_field_name.keyword (if exists) or None
    """
    # the labels for each column (display_name is index)
    column_labels = ['es_field_name', 'is_source', 'es_dtype', 'es_date_format', 'pd_dtype', 'is_searchable',
                     'is_aggregatable', 'is_scripted', 'aggregatable_es_field_name']
    def __init__(self,
                 client=None,
                 index_pattern=None,
-                 mappings=None):
+                 display_names=None):
        """
        Parameters
        ----------
@ -59,39 +74,29 @@ class Mappings:
        index_pattern: str
            Elasticsearch index pattern
-        Copy constructor arguments
+        display_names: list of str
-
+            Field names to display
        mappings: Mappings
            Object to copy
        """
        if (client is None) or (index_pattern is None):
            raise ValueError("Can not initialise mapping without client or index_pattern {} {}", client, index_pattern)
        # here we keep track of the format of any date fields
        self._date_fields_format = dict()
-        if (client is not None) and (index_pattern is not None):
+        get_mapping = client.get_mapping(index=index_pattern)
            get_mapping = client.get_mapping(index=index_pattern)
-            # Get all fields (including all nested) and then all field_caps
+        # Get all fields (including all nested) and then all field_caps
-            all_fields, self._date_fields_format = Mappings._extract_fields_from_mapping(get_mapping)
+        all_fields = FieldMappings._extract_fields_from_mapping(get_mapping)
-            all_fields_caps = client.field_caps(index=index_pattern, fields='*')
+        all_fields_caps = client.field_caps(index=index_pattern, fields='*')
-            # Get top level (not sub-field multifield) mappings
+        # Get top level (not sub-field multifield) mappings
-            source_fields, _ = Mappings._extract_fields_from_mapping(get_mapping, source_only=True)
+        source_fields = FieldMappings._extract_fields_from_mapping(get_mapping, source_only=True)
-            # Populate capability matrix of fields
+        # Populate capability matrix of fields
-            # field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source
+        self._mappings_capabilities = FieldMappings._create_capability_matrix(all_fields, source_fields,
-            self._mappings_capabilities = Mappings._create_capability_matrix(all_fields, source_fields, all_fields_caps)
+                                                                              all_fields_caps)
        else:
            # straight copy
            self._mappings_capabilities = mappings._mappings_capabilities.copy()
-        # Cache source field types for efficient lookup
+        if display_names is not None:
-        # (this massively improves performance of DataFrame.flatten)
+            self.display_names = display_names
        self._source_field_pd_dtypes = OrderedDict()
        for field_name in self._mappings_capabilities[self._mappings_capabilities._source].index:
            pd_dtype = self._mappings_capabilities.loc[field_name]['pd_dtype']
            self._source_field_pd_dtypes[field_name] = pd_dtype
    @staticmethod
    def _extract_fields_from_mapping(mappings, source_only=False, date_format=None):
@ -143,7 +148,6 @@ class Mappings:
        """
        fields = OrderedDict()
        dates_format = dict()
        # Recurse until we get a 'type: xxx'
        def flatten(x, name=''):
@ -153,15 +157,16 @@ class Mappings:
                        field_name = name[:-1]
                        field_type = x[a]
                        # if field_type is 'date' keep track of the format info when available
                        date_format = None
                        if field_type == "date" and "format" in x:
-                            dates_format[field_name] = x["format"]
+                            date_format = x["format"]
                        # If there is a conflicting type, warn - first values added wins
                        if field_name in fields and fields[field_name] != field_type:
                            warnings.warn("Field {} has conflicting types {} != {}".
                                          format(field_name, fields[field_name], field_type),
                                          UserWarning)
                        else:
-                            fields[field_name] = field_type
+                            fields[field_name] = (field_type, date_format)
                    elif a == 'properties' or (not source_only and a == 'fields'):
                        flatten(x[a], name)
                    elif not (source_only and a == 'fields'):  # ignore multi-field fields for source_only
@ -173,7 +178,7 @@ class Mappings:
                flatten(properties)
-        return fields, dates_format
+        return fields
    @staticmethod
    def _create_capability_matrix(all_fields, source_fields, all_fields_caps):
@ -206,7 +211,6 @@ class Mappings:
        """
        all_fields_caps_fields = all_fields_caps['fields']
        field_names = ['_source', 'es_dtype', 'pd_dtype', 'searchable', 'aggregatable']
        capability_matrix = OrderedDict()
        for field, field_caps in all_fields_caps_fields.items():
@ -214,12 +218,17 @@ class Mappings:
                # v = {'long': {'type': 'long', 'searchable': True, 'aggregatable': True}}
                for kk, vv in field_caps.items():
                    _source = (field in source_fields)
                    es_field_name = field
                    es_dtype = vv['type']
-                    pd_dtype = Mappings._es_dtype_to_pd_dtype(vv['type'])
+                    es_date_format = all_fields[field][1]
-                    searchable = vv['searchable']
+                    pd_dtype = FieldMappings._es_dtype_to_pd_dtype(vv['type'])
-                    aggregatable = vv['aggregatable']
+                    is_searchable = vv['searchable']
                    is_aggregatable = vv['aggregatable']
                    scripted = False
                    aggregatable_es_field_name = None  # this is populated later
-                    caps = [_source, es_dtype, pd_dtype, searchable, aggregatable]
+                    caps = [es_field_name, _source, es_dtype, es_date_format, pd_dtype, is_searchable, is_aggregatable,
                            scripted, aggregatable_es_field_name]
                    capability_matrix[field] = caps
@ -232,9 +241,34 @@ class Mappings:
                                      format(field, vv['non_searchable_indices']),
                                      UserWarning)
-        capability_matrix_df = pd.DataFrame.from_dict(capability_matrix, orient='index', columns=field_names)
+        capability_matrix_df = pd.DataFrame.from_dict(capability_matrix, orient='index',
                                                      columns=FieldMappings.column_labels)
-        return capability_matrix_df.sort_index()
+        def find_aggregatable(row, df):
            # convert series to dict so we can add 'aggregatable_es_field_name'
            row_as_dict = row.to_dict()
            if row_as_dict['is_aggregatable'] == False:
                # if not aggregatable, then try field.keyword
                es_field_name_keyword = row.es_field_name + '.keyword'
                try:
                    series = df.loc[df.es_field_name == es_field_name_keyword]
                    if not series.empty and series.is_aggregatable.squeeze():
                        row_as_dict['aggregatable_es_field_name'] = es_field_name_keyword
                    else:
                        row_as_dict['aggregatable_es_field_name'] = None
                except KeyError:
                    row_as_dict['aggregatable_es_field_name'] = None
            else:
                row_as_dict['aggregatable_es_field_name'] = row_as_dict['es_field_name']
            return pd.Series(data=row_as_dict)
        # add aggregatable_es_field_name column by applying action to each row
        capability_matrix_df = capability_matrix_df.apply(find_aggregatable, args=(capability_matrix_df,),
                                                          axis='columns')
        # return just source fields (as these are the only ones we display)
        return capability_matrix_df[capability_matrix_df.is_source].sort_index()
    @staticmethod
    def _es_dtype_to_pd_dtype(es_dtype):
@ -352,105 +386,50 @@ class Mappings:
            if geo_points is not None and field_name_name in geo_points:
                es_dtype = 'geo_point'
            else:
-                es_dtype = Mappings._pd_dtype_to_es_dtype(dtype)
+                es_dtype = FieldMappings._pd_dtype_to_es_dtype(dtype)
            mappings['properties'][field_name_name] = OrderedDict()
            mappings['properties'][field_name_name]['type'] = es_dtype
        return {"mappings": mappings}
-    def all_fields(self):
+    def aggregatable_field_name(self, display_name):
        """
-        Returns
+        Return a single aggregatable field_name from display_name
-        -------
+
-        all_fields: list
+        Logic here is that field_name names are '_source' fields and keyword fields
-            All typed fields in the index mapping
+        may be nested beneath the field. E.g.
-        """
+        customer_full_name: text
-        return self._mappings_capabilities.index.tolist()
+        customer_full_name.keyword: keyword
        customer_full_name.keyword is the aggregatable field for customer_full_name
    def field_capabilities(self, field_name):
        """
        Parameters
        ----------
-        field_name: str
+        display_name: str
        Returns
        -------
-        mappings_capabilities: pd.Series with index values:
+        aggregatable_es_field_name: str or None
-            _source: bool
+            The aggregatable field name associated with display_name. This could be the field_name, or the
-                Is this field name a top-level source field?
+            field_name.keyword.
-            ed_dtype: str
+
-                The Elasticsearch data type
+        raise KeyError if the field_name doesn't exist in the mapping, or isn't aggregatable
            pd_dtype: str
                The pandas data type
            searchable: bool
                Is the field searchable in Elasticsearch?
            aggregatable: bool
                Is the field aggregatable in Elasticsearch?
        """
-        try:
+        if display_name not in self._mappings_capabilities.index:
-            field_capabilities = self._mappings_capabilities.loc[field_name]
+            raise KeyError("Can not get aggregatable field name for invalid display name {}".format(display_name))
        except KeyError:
            field_capabilities = pd.Series()
        return field_capabilities
-    def get_date_field_format(self, field_name):
+        if self._mappings_capabilities.loc[display_name].aggregatable_es_field_name is None:
            warnings.warn("Aggregations not supported for '{}' '{}'".format(display_name,
                                                                            self._mappings_capabilities.loc[
                                                                                display_name].es_field_name))
        return self._mappings_capabilities.loc[display_name].aggregatable_es_field_name
    def aggregatable_field_names(self):
        """
-        Parameters
+        Return a list of aggregatable Elasticsearch field_names for all display names.
-        ----------
+        If field is not aggregatable_field_names, return nothing.
        field_name: str
        Returns
        -------
        str
            A string (for date fields) containing the date format for the field
        """
        return self._date_fields_format.get(field_name)
    def source_field_pd_dtype(self, field_name):
        """
        Parameters
        ----------
        field_name: str
        Returns
        -------
        is_source_field: bool
            Is this field name a top-level source field?
        pd_dtype: str
            The pandas data type we map to
        """
        pd_dtype = 'object'
        is_source_field = False
        if field_name in self._source_field_pd_dtypes:
            is_source_field = True
            pd_dtype = self._source_field_pd_dtypes[field_name]
        return is_source_field, pd_dtype
    def is_source_field(self, field_name):
        """
        Parameters
        ----------
        field_name: str
        Returns
        -------
        is_source_field: bool
            Is this field name a top-level source field?
        """
        is_source_field = False
        if field_name in self._source_field_pd_dtypes:
            is_source_field = True
        return is_source_field
    def aggregatable_field_names(self, field_names=None):
        """
        Return a dict of aggregatable field_names from all field_names or field_names list
        {'customer_full_name': 'customer_full_name.keyword', ...}
        Logic here is that field_name names are '_source' fields and keyword fields
        may be nested beneath the field. E.g.
@ -461,29 +440,83 @@ class Mappings:
        Returns
        -------
-        OrderedDict
+        Dict of aggregatable_field_names
-            e.g. {'customer_full_name': 'customer_full_name.keyword', ...}
+            key = aggregatable_field_name, value = field_name
            e.g. {'customer_full_name.keyword': 'customer_full_name', ...}
        """
-        if field_names is None:
+        non_aggregatables = self._mappings_capabilities[self._mappings_capabilities.aggregatable_es_field_name.isna()]
-            field_names = self.source_fields()
+        if not non_aggregatables.empty:
-        aggregatables = OrderedDict()
+            warnings.warn("Aggregations not supported for '{}'".format(non_aggregatables))
        for field_name in field_names:
            capabilities = self.field_capabilities(field_name)
            if capabilities['aggregatable']:
                aggregatables[field_name] = field_name
            else:
                # Try 'field_name.keyword'
                field_name_keyword = field_name + '.keyword'
                capabilities = self.field_capabilities(field_name_keyword)
                if not capabilities.empty and capabilities.get('aggregatable'):
                    aggregatables[field_name_keyword] = field_name
-        if not aggregatables:
+        aggregatables = self._mappings_capabilities[self._mappings_capabilities.aggregatable_es_field_name.notna()]
            raise ValueError("Aggregations not supported for ", field_names)
-        return aggregatables
+        # extract relevant fields and convert to dict
        # <class 'dict'>: {'category.keyword': 'category', 'currency': 'currency', ...
        return OrderedDict(aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data'])
-    def numeric_source_fields(self, field_names, include_bool=True):
+    def get_date_field_format(self, es_field_name):
        """
        Parameters
        ----------
        es_field_name: str
        Returns
        -------
        str
            A string (for date fields) containing the date format for the field
        """
        return self._mappings_capabilities.loc[
            self._mappings_capabilities.es_field_name == es_field_name].es_date_format.squeeze()
    def field_name_pd_dtype(self, es_field_name):
        """
        Parameters
        ----------
        es_field_name: str
        Returns
        -------
        pd_dtype: str
            The pandas data type we map to
        Raises
        ------
        KeyError
            If es_field_name does not exist in mapping
        """
        if es_field_name not in self._mappings_capabilities.es_field_name:
            raise KeyError("es_field_name {} does not exist".format(es_field_name))
        pd_dtype = self._mappings_capabilities.loc[
            self._mappings_capabilities.es_field_name == es_field_name
            ].pd_dtype.squeeze()
        return pd_dtype
    def add_scripted_field(self, scripted_field_name, display_name, pd_dtype):
        # if this display name is used somewhere else, drop it
        if display_name in self._mappings_capabilities.index:
            self._mappings_capabilities = self._mappings_capabilities.drop(index=[display_name])
        # ['es_field_name', 'is_source', 'es_dtype', 'es_date_format', 'pd_dtype', 'is_searchable',
        # 'is_aggregatable', 'is_scripted', 'aggregatable_es_field_name']
        capabilities = {display_name: [scripted_field_name,
                                       False,
                                       self._pd_dtype_to_es_dtype(pd_dtype),
                                       None,
                                       pd_dtype,
                                       True,
                                       True,
                                       True,
                                       scripted_field_name]}
        capability_matrix_row = pd.DataFrame.from_dict(capabilities, orient='index',
                                                       columns=FieldMappings.column_labels)
        self._mappings_capabilities = self._mappings_capabilities.append(capability_matrix_row)
    def numeric_source_fields(self, include_bool=True):
        """
        Returns
        -------
@ -491,60 +524,83 @@ class Mappings:
            List of source fields where pd_dtype == (int64 or float64 or bool)
        """
        if include_bool:
-            df = self._mappings_capabilities[self._mappings_capabilities._source &
+            df = self._mappings_capabilities[((self._mappings_capabilities.pd_dtype == 'int64') |
                                             ((self._mappings_capabilities.pd_dtype == 'int64') |
                                              (self._mappings_capabilities.pd_dtype == 'float64') |
                                              (self._mappings_capabilities.pd_dtype == 'bool'))]
        else:
-            df = self._mappings_capabilities[self._mappings_capabilities._source &
+            df = self._mappings_capabilities[((self._mappings_capabilities.pd_dtype == 'int64') |
                                             ((self._mappings_capabilities.pd_dtype == 'int64') |
                                              (self._mappings_capabilities.pd_dtype == 'float64'))]
        # if field_names exists, filter index with field_names
        if field_names is not None:
            # reindex adds NA for non-existing field_names (non-numeric), so drop these after reindex
            df = df.reindex(field_names)
            df.dropna(inplace=True)
-        # return as list
+        # return as list for display names (in display_name order)
-        return df.index.to_list()
+        return df.es_field_name.to_list()
-    def source_fields(self):
+    def get_field_names(self, include_scripted_fields=True):
-        """
+        if include_scripted_fields:
-        Returns
+            return self._mappings_capabilities.es_field_name.to_list()
        -------
        source_fields: list of str
            List of source fields
        """
        return self._source_field_pd_dtypes.keys()
-    def count_source_fields(self):
+        return self._mappings_capabilities[
-        """
+            self._mappings_capabilities.is_scripted == False
-        Returns
+            ].es_field_name.to_list()
        -------
        count_source_fields: int
            Number of source fields in mapping
        """
        return len(self._source_field_pd_dtypes)
-    def dtypes(self, field_names=None):
+    def _get_display_names(self):
        return self._mappings_capabilities.index.to_list()
    def _set_display_names(self, display_names):
        if not is_list_like(display_names):
            raise ValueError("'{}' is not list like".format(display_names))
        if list(set(display_names) - set(self.display_names)):
            raise KeyError("{} not in display names {}".format(display_names, self.display_names))
        self._mappings_capabilities = self._mappings_capabilities.reindex(display_names)
    display_names = property(_get_display_names, _set_display_names)
    def dtypes(self):
        """
        Returns
        -------
        dtypes: pd.Series
-            Source field name + pd_dtype as np.dtype
+            Index: Display name
            Values: pd_dtype as np.dtype
        """
-        if field_names is not None:
+        pd_dtypes = self._mappings_capabilities['pd_dtype']
            data = OrderedDict()
            for key in field_names:
                data[key] = np.dtype(self._source_field_pd_dtypes[key])
            return pd.Series(data)
-        data = OrderedDict()
+        # Set name of the returned series as None
-        for key, value in self._source_field_pd_dtypes.items():
+        pd_dtypes.name = None
-            data[key] = np.dtype(value)
+
-        return pd.Series(data)
+        # Convert return from 'str' to 'np.dtype'
        return pd_dtypes.apply(lambda x: np.dtype(x))
    def info_es(self, buf):
        buf.write("Mappings:\n")
-        buf.write(" capabilities:\n{}\n".format(self._mappings_capabilities.to_string()))
+        buf.write(" capabilities:\n{0}\n".format(self._mappings_capabilities.to_string()))
-        buf.write(" date_fields_format: {}\n".format(self._date_fields_format))
+
    def rename(self, old_name_new_name_dict):
        """
        Renames display names in-place
        Parameters
        ----------
        old_name_new_name_dict
        Returns
        -------
        Nothing
        Notes
        -----
        For the names that do not exist this is a no op
        """
        self._mappings_capabilities = self._mappings_capabilities.rename(index=old_name_new_name_dict)
    def get_renames(self):
        # return dict of renames { old_name: new_name, ... } (inefficient)
        renames = {}
        for display_name in self.display_names:
            field_name = self._mappings_capabilities.loc[display_name].es_field_name
            if field_name != display_name:
                renames[field_name] = display_name
        return renames
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@ -60,7 +60,7 @@ class NDFrame(ABC):
            A reference to a Elasticsearch python client
        """
        if query_compiler is None:
-            query_compiler = QueryCompiler(client=client, index_pattern=index_pattern, field_names=columns,
+            query_compiler = QueryCompiler(client=client, index_pattern=index_pattern, display_names=columns,
                                           index_field=index_field)
        self._query_compiler = query_compiler
--- a/eland/operations.py
+++ b/eland/operations.py
@ -11,10 +11,9 @@
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 import copy
 from collections import OrderedDict
 import warnings
 from collections import OrderedDict
 import pandas as pd
@ -38,18 +37,19 @@ class Operations:
    (see https://docs.dask.org/en/latest/spec.html)
    """
-    def __init__(self, tasks=None, field_names=None):
+    def __init__(self, tasks=None, arithmetic_op_fields_task=None):
        if tasks is None:
            self._tasks = []
        else:
            self._tasks = tasks
-        self._field_names = field_names
+        self._arithmetic_op_fields_task = arithmetic_op_fields_task
    def __constructor__(self, *args, **kwargs):
        return type(self)(*args, **kwargs)
    def copy(self):
-        return self.__constructor__(tasks=copy.deepcopy(self._tasks), field_names=copy.deepcopy(self._field_names))
+        return self.__constructor__(tasks=copy.deepcopy(self._tasks),
                                    arithmetic_op_fields_task=copy.deepcopy(self._arithmetic_op_fields_task))
    def head(self, index, n):
        # Add a task that is an ascending sort with size=n
@ -61,29 +61,21 @@ class Operations:
        task = TailTask(index.sort_field, n)
        self._tasks.append(task)
-    def arithmetic_op_fields(self, field_name, op_name, left_field, right_field, op_type=None):
+    def arithmetic_op_fields(self, display_name, arithmetic_series):
-        # Set this as a column we want to retrieve
+        if self._arithmetic_op_fields_task is None:
-        self.set_field_names([field_name])
+            self._arithmetic_op_fields_task = ArithmeticOpFieldsTask(display_name, arithmetic_series)
        else:
            self._arithmetic_op_fields_task.update(display_name, arithmetic_series)
-        task = ArithmeticOpFieldsTask(field_name, op_name, left_field, right_field, op_type)
+    def get_arithmetic_op_fields(self):
-        self._tasks.append(task)
+        # get an ArithmeticOpFieldsTask if it exists
-
+        return self._arithmetic_op_fields_task
    def set_field_names(self, field_names):
        if not isinstance(field_names, list):
            field_names = list(field_names)
        self._field_names = field_names
        return self._field_names
    def get_field_names(self):
        return self._field_names
    def __repr__(self):
        return repr(self._tasks)
    def count(self, query_compiler):
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        # Elasticsearch _count is very efficient and so used to return results here. This means that
        # data frames that have restricted size or sort params will not return valid results
@ -95,7 +87,7 @@ class Operations:
                                      .format(query_params, post_processing))
        # Only return requested field_names
-        fields = query_compiler.field_names
+        fields = query_compiler.get_field_names(include_scripted_fields=False)
        counts = OrderedDict()
        for field in fields:
@ -142,45 +134,58 @@ class Operations:
        pandas.Series
            Series containing results of `func` applied to the field_name(s)
        """
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size = self._size(query_params, post_processing)
        if size is not None:
            raise NotImplementedError("Can not count field matches if size is set {}".format(size))
        field_names = self.get_field_names()
        body = Query(query_params['query'])
        results = OrderedDict()
        # some metrics aggs (including cardinality) work on all aggregatable fields
        # therefore we include an optional all parameter on operations
        # that call _metric_aggs
        if field_types == 'aggregatable':
-            source_fields = query_compiler._mappings.aggregatable_field_names(field_names)
+            aggregatable_field_names = query_compiler._mappings.aggregatable_field_names()
        else:
            source_fields = query_compiler._mappings.numeric_source_fields(field_names)
-        for field in source_fields:
+            for field in aggregatable_field_names.keys():
-            body.metric_aggs(field, func, field)
+                body.metric_aggs(field, func, field)
-        response = query_compiler._client.search(
+            response = query_compiler._client.search(
-            index=query_compiler._index_pattern,
+                index=query_compiler._index_pattern,
-            size=0,
+                size=0,
-            body=body.to_search_body())
+                body=body.to_search_body())
-        # Results are of the form
+            # Results are of the form
-        # "aggregations" : {
+            # "aggregations" : {
-        #   "AvgTicketPrice" : {
+            #   "customer_full_name.keyword" : {
-        #     "value" : 628.2536888148849
+            #     "value" : 10
-        #   }
+            #   }
-        # }
+            # }
        results = OrderedDict()
-        if field_types == 'aggregatable':
+            # map aggregatable (e.g. x.keyword) to field_name
-            for key, value in source_fields.items():
+            for key, value in aggregatable_field_names.items():
                results[value] = response['aggregations'][key]['value']
        else:
-            for field in source_fields:
+            numeric_source_fields = query_compiler._mappings.numeric_source_fields()
            for field in numeric_source_fields:
                body.metric_aggs(field, func, field)
            response = query_compiler._client.search(
                index=query_compiler._index_pattern,
                size=0,
                body=body.to_search_body())
            # Results are of the form
            # "aggregations" : {
            #   "AvgTicketPrice" : {
            #     "value" : 628.2536888148849
            #   }
            # }
            for field in numeric_source_fields:
                results[field] = response['aggregations'][field]['value']
        # Return single value if this is a series
@ -202,16 +207,14 @@ class Operations:
        pandas.Series
            Series containing results of `func` applied to the field_name(s)
        """
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size = self._size(query_params, post_processing)
        if size is not None:
            raise NotImplementedError("Can not count field matches if size is set {}".format(size))
        field_names = self.get_field_names()
        # Get just aggregatable field_names
-        aggregatable_field_names = query_compiler._mappings.aggregatable_field_names(field_names)
+        aggregatable_field_names = query_compiler._mappings.aggregatable_field_names()
        body = Query(query_params['query'])
@ -232,7 +235,8 @@ class Operations:
                results[bucket['key']] = bucket['doc_count']
        try:
-            name = field_names[0]
+            # get first value in dict (key is .keyword)
            name = list(aggregatable_field_names.values())[0]
        except IndexError:
            name = None
@ -242,15 +246,13 @@ class Operations:
    def _hist_aggs(self, query_compiler, num_bins):
        # Get histogram bins and weights for numeric field_names
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size = self._size(query_params, post_processing)
        if size is not None:
            raise NotImplementedError("Can not count field matches if size is set {}".format(size))
-        field_names = self.get_field_names()
+        numeric_source_fields = query_compiler._mappings.numeric_source_fields()
        numeric_source_fields = query_compiler._mappings.numeric_source_fields(field_names)
        body = Query(query_params['query'])
@ -300,9 +302,9 @@ class Operations:
            # in case of dataframe, throw warning that field is excluded
            if not response['aggregations'].get(field):
                warnings.warn("{} has no meaningful histogram interval and will be excluded. "
-                                "All values 0."
+                              "All values 0."
-                            .format(field),
+                              .format(field),
-                            UserWarning)
+                              UserWarning)
                continue
            buckets = response['aggregations'][field]['buckets']
@ -396,13 +398,13 @@ class Operations:
        return ed_aggs
    def aggs(self, query_compiler, pd_aggs):
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size = self._size(query_params, post_processing)
        if size is not None:
            raise NotImplementedError("Can not count field matches if size is set {}".format(size))
-        field_names = self.get_field_names()
+        field_names = query_compiler.get_field_names(include_scripted_fields=False)
        body = Query(query_params['query'])
@ -446,15 +448,13 @@ class Operations:
        return df
    def describe(self, query_compiler):
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size = self._size(query_params, post_processing)
        if size is not None:
            raise NotImplementedError("Can not count field matches if size is set {}".format(size))
-        field_names = self.get_field_names()
+        numeric_source_fields = query_compiler._mappings.numeric_source_fields(include_bool=False)
        numeric_source_fields = query_compiler._mappings.numeric_source_fields(field_names, include_bool=False)
        # for each field we compute:
        # count, mean, std, min, 25%, 50%, 75%, max
@ -510,8 +510,8 @@ class Operations:
    def to_csv(self, query_compiler, **kwargs):
        class PandasToCSVCollector:
-            def __init__(self, **kwargs):
+            def __init__(self, **args):
-                self.kwargs = kwargs
+                self.args = args
                self.ret = None
                self.first_time = True
@ -520,12 +520,12 @@ class Operations:
                # and append results
                if self.first_time:
                    self.first_time = False
-                    df.to_csv(**self.kwargs)
+                    df.to_csv(**self.args)
                else:
                    # Don't write header, and change mode to append
-                    self.kwargs['header'] = False
+                    self.args['header'] = False
-                    self.kwargs['mode'] = 'a'
+                    self.args['mode'] = 'a'
-                    df.to_csv(**self.kwargs)
+                    df.to_csv(**self.args)
            @staticmethod
            def batch_size():
@ -540,7 +540,7 @@ class Operations:
        return collector.ret
    def _es_results(self, query_compiler, collector):
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size, sort_params = Operations._query_params_to_size_and_sort(query_params)
@ -552,7 +552,9 @@ class Operations:
            body['script_fields'] = script_fields
        # Only return requested field_names
-        field_names = self.get_field_names()
+        _source = query_compiler.get_field_names(include_scripted_fields=False)
        if not _source:
            _source = False
        es_results = None
@ -567,7 +569,7 @@ class Operations:
                        size=size,
                        sort=sort_params,
                        body=body,
-                        _source=field_names)
+                        _source=_source)
                except Exception:
                    # Catch all ES errors and print debug (currently to stdout)
                    error = {
@ -575,7 +577,7 @@ class Operations:
                        'size': size,
                        'sort': sort_params,
                        'body': body,
-                        '_source': field_names
+                        '_source': _source
                    }
                    print("Elasticsearch error:", error)
                    raise
@ -584,7 +586,7 @@ class Operations:
            es_results = query_compiler._client.scan(
                index=query_compiler._index_pattern,
                query=body,
-                _source=field_names)
+                _source=_source)
            # create post sort
            if sort_params is not None:
                post_processing.append(SortFieldAction(sort_params))
@ -603,7 +605,7 @@ class Operations:
    def index_count(self, query_compiler, field):
        # field is the index field so count values
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size = self._size(query_params, post_processing)
@ -617,12 +619,12 @@ class Operations:
        return query_compiler._client.count(index=query_compiler._index_pattern, body=body.to_count_body())
-    def _validate_index_operation(self, items):
+    def _validate_index_operation(self, query_compiler, items):
        if not isinstance(items, list):
            raise TypeError("list item required - not {}".format(type(items)))
        # field is the index field so count values
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size = self._size(query_params, post_processing)
@ -633,7 +635,7 @@ class Operations:
        return query_params, post_processing
    def index_matches_count(self, query_compiler, field, items):
-        query_params, post_processing = self._validate_index_operation(items)
+        query_params, post_processing = self._validate_index_operation(query_compiler, items)
        body = Query(query_params['query'])
@ -645,7 +647,7 @@ class Operations:
        return query_compiler._client.count(index=query_compiler._index_pattern, body=body.to_count_body())
    def drop_index_values(self, query_compiler, field, items):
-        self._validate_index_operation(items)
+        self._validate_index_operation(query_compiler, items)
        # Putting boolean queries together
        # i = 10
@ -689,7 +691,7 @@ class Operations:
        return df
-    def _resolve_tasks(self):
+    def _resolve_tasks(self, query_compiler):
        # We now try and combine all tasks into an Elasticsearch query
        # Some operations can be simply combined into a single query
        # other operations require pre-queries and then combinations
@ -704,7 +706,11 @@ class Operations:
        post_processing = []
        for task in self._tasks:
-            query_params, post_processing = task.resolve_task(query_params, post_processing)
+            query_params, post_processing = task.resolve_task(query_params, post_processing, query_compiler)
        if self._arithmetic_op_fields_task is not None:
            query_params, post_processing = self._arithmetic_op_fields_task.resolve_task(query_params, post_processing,
                                                                                         query_compiler)
        return query_params, post_processing
@ -722,13 +728,13 @@ class Operations:
        # This can return None
        return size
-    def info_es(self, buf):
+    def info_es(self, query_compiler, buf):
        buf.write("Operations:\n")
        buf.write(" tasks: {0}\n".format(self._tasks))
-        query_params, post_processing = self._resolve_tasks()
+        query_params, post_processing = self._resolve_tasks(query_compiler)
        size, sort_params = Operations._query_params_to_size_and_sort(query_params)
-        field_names = self.get_field_names()
+        _source = query_compiler._mappings.get_field_names()
        script_fields = query_params['query_script_fields']
        query = Query(query_params['query'])
@ -738,7 +744,7 @@ class Operations:
        buf.write(" size: {0}\n".format(size))
        buf.write(" sort_params: {0}\n".format(sort_params))
-        buf.write(" _source: {0}\n".format(field_names))
+        buf.write(" _source: {0}\n".format(_source))
        buf.write(" body: {0}\n".format(body))
        buf.write(" post_processing: {0}\n".format(post_processing))
--- a/eland/query.py
+++ b/eland/query.py
@ -21,20 +21,15 @@ from eland.filter import BooleanFilter, NotNull, IsNull, IsIn
 class Query:
    """
    Simple class to manage building Elasticsearch queries.
    Specifically, this
    """
    def __init__(self, query=None):
        if query is None:
            self._query = BooleanFilter()
            self._script_fields = {}
            self._aggs = {}
        else:
            # Deep copy the incoming query so we can change it
            self._query = deepcopy(query._query)
            self._script_fields = deepcopy(query._script_fields)
            self._aggs = deepcopy(query._aggs)
    def exists(self, field, must=True):
@ -182,13 +177,5 @@ class Query:
        else:
            self._query = self._query & boolean_filter
    def arithmetic_op_fields(self, op_name, left_field, right_field):
        if self._script_fields.empty():
            body = None
        else:
            body = {"query": self._script_fields.build()}
        return body
    def __repr__(self):
        return repr(self.to_search_body())
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -11,7 +11,7 @@
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
-
+import copy
 import warnings
 from collections import OrderedDict
 from typing import Union
@ -20,8 +20,8 @@ import numpy as np
 import pandas as pd
 from eland import Client
 from eland import FieldMappings
 from eland import Index
 from eland import Mappings
 from eland import Operations
@ -54,72 +54,58 @@ class QueryCompiler:
    A way to mitigate this would be to post process this drop - TODO
    """
-    def __init__(self, client=None, index_pattern=None, field_names=None, index_field=None, operations=None,
+    def __init__(self,
-                 name_mapper=None):
+                 client=None,
-        self._client = Client(client)
+                 index_pattern=None,
-        self._index_pattern = index_pattern
+                 display_names=None,
-
+                 index_field=None,
-        # Get and persist mappings, this allows us to correctly
+                 to_copy=None):
-        # map returned types from Elasticsearch to pandas datatypes
+        # Implement copy as we don't deep copy the client
-        self._mappings = Mappings(client=self._client, index_pattern=self._index_pattern)
+        if to_copy is not None:
-
+            self._client = Client(to_copy._client)
-        self._index = Index(self, index_field)
+            self._index_pattern = to_copy._index_pattern
-
+            self._index = Index(self, to_copy._index.index_field)
-        if operations is None:
+            self._operations = copy.deepcopy(to_copy._operations)
            self._mappings = copy.deepcopy(to_copy._mappings)
        else:
            self._client = Client(client)
            self._index_pattern = index_pattern
            # Get and persist mappings, this allows us to correctly
            # map returned types from Elasticsearch to pandas datatypes
            self._mappings = FieldMappings(client=self._client, index_pattern=self._index_pattern,
                                           display_names=display_names)
            self._index = Index(self, index_field)
            self._operations = Operations()
        else:
            self._operations = operations
-        if field_names is not None:
+    @property
-            self.field_names = field_names
+    def index(self):
        if name_mapper is None:
            self._name_mapper = QueryCompiler.DisplayNameToFieldNameMapper()
        else:
            self._name_mapper = name_mapper
    def _get_index(self):
        return self._index
-    def _get_field_names(self):
+    @property
-        field_names = self._operations.get_field_names()
+    def columns(self):
-        if field_names is None:
+        columns = self._mappings.display_names
            # default to all
            field_names = self._mappings.source_fields()
        return pd.Index(field_names)
    def _set_field_names(self, field_names):
        self._operations.set_field_names(field_names)
    field_names = property(_get_field_names, _set_field_names)
    def _get_columns(self):
        columns = self._operations.get_field_names()
        if columns is None:
            # default to all
            columns = self._mappings.source_fields()
        # map renames
        columns = self._name_mapper.field_to_display_names(columns)
        return pd.Index(columns)
-    def _set_columns(self, columns):
+    def _get_display_names(self):
-        # map renames
+        display_names = self._mappings.display_names
        columns = self._name_mapper.display_to_field_names(columns)
-        self._operations.set_field_names(columns)
+        return pd.Index(display_names)
-    columns = property(_get_columns, _set_columns)
+    def _set_display_names(self, display_names):
        self._mappings.display_names = display_names
-    index = property(_get_index)
+    def get_field_names(self, include_scripted_fields):
        return self._mappings.get_field_names(include_scripted_fields)
    def add_scripted_field(self, scripted_field_name, display_name, pd_dtype):
        result = self.copy()
        self._mappings.add_scripted_field(scripted_field_name, display_name, pd_dtype)
        return result
    @property
    def dtypes(self):
-        columns = self._operations.get_field_names()
+        return self._mappings.dtypes()
        return self._mappings.dtypes(columns)
    # END Index, columns, and dtypes objects
@ -231,7 +217,10 @@ class QueryCompiler:
        for hit in iterator:
            i = i + 1
-            row = hit['_source']
+            if '_source' in hit:
                row = hit['_source']
            else:
                row = {}
            # script_fields appear in 'fields'
            if 'fields' in hit:
@ -260,15 +249,14 @@ class QueryCompiler:
        # _source may not contain all field_names in the mapping
        # therefore, fill in missing field_names
        # (note this returns self.field_names NOT IN df.columns)
-        missing_field_names = list(set(self.field_names) - set(df.columns))
+        missing_field_names = list(set(self.get_field_names(include_scripted_fields=True)) - set(df.columns))
        for missing in missing_field_names:
-            is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(missing)
+            pd_dtype = self._mappings.field_name_pd_dtype(missing)
            df[missing] = pd.Series(dtype=pd_dtype)
        # Rename columns
-        if not self._name_mapper.empty:
+        df.rename(columns=self._mappings.get_renames(), inplace=True)
            df.rename(columns=self._name_mapper.display_names_mapper(), inplace=True)
        # Sort columns in mapping order
        if len(self.columns) > 1:
@ -286,7 +274,11 @@ class QueryCompiler:
                is_source_field = False
                pd_dtype = 'object'
            else:
-                is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(name[:-1])
+                try:
                    pd_dtype = self._mappings.field_name_pd_dtype(name[:-1])
                    is_source_field = True
                except KeyError:
                    is_source_field = False
            if not is_source_field and type(x) is dict:
                for a in x:
@ -349,15 +341,6 @@ class QueryCompiler:
        """
        return self._operations.index_matches_count(self, self.index.index_field, items)
    def _index_matches(self, items):
        """
        Returns
        -------
        index_count: int
            Count of list of the items that match
        """
        return self._operations.index_matches(self, self.index.index_field, items)
    def _empty_pd_ef(self):
        # Return an empty dataframe with correct columns and dtypes
        df = pd.DataFrame()
@ -366,17 +349,15 @@ class QueryCompiler:
        return df
    def copy(self):
-        return QueryCompiler(client=self._client, index_pattern=self._index_pattern, field_names=None,
+        return QueryCompiler(to_copy=self)
                             index_field=self._index.index_field, operations=self._operations.copy(),
                             name_mapper=self._name_mapper.copy())
    def rename(self, renames, inplace=False):
        if inplace:
-            self._name_mapper.rename_display_name(renames)
+            self._mappings.rename(renames)
            return self
        else:
            result = self.copy()
-            result._name_mapper.rename_display_name(renames)
+            result._mappings.rename(renames)
            return result
    def head(self, n):
@ -428,7 +409,7 @@ class QueryCompiler:
        if numeric:
            raise NotImplementedError("Not implemented yet...")
-        result._operations.set_field_names(list(key))
+        result._mappings.display_names = list(key)
        return result
@ -439,7 +420,7 @@ class QueryCompiler:
        if columns is not None:
            # columns is a pandas.Index so we can use pandas drop feature
            new_columns = self.columns.drop(columns)
-            result._operations.set_field_names(new_columns.to_list())
+            result._mappings.display_names = new_columns.to_list()
        if index is not None:
            result._operations.drop_index_values(self, self.index.index_field, index)
@ -475,8 +456,7 @@ class QueryCompiler:
        self._index.info_es(buf)
        self._mappings.info_es(buf)
-        self._operations.info_es(buf)
+        self._operations.info_es(self, buf)
        self._name_mapper.info_es(buf)
    def describe(self):
        return self._operations.describe(self)
@ -533,140 +513,26 @@ class QueryCompiler:
                "{0} != {1}".format(self._index_pattern, right._index_pattern)
            )
-    def check_str_arithmetics(self, right, self_field, right_field):
+    def arithmetic_op_fields(self, display_name, arithmetic_object):
        """
        In the case of string arithmetics, we need an additional check to ensure that the
        selected fields are aggregatable.
        Parameters
        ----------
        right: QueryCompiler
            The query compiler to compare self to
        Raises
        ------
        TypeError, ValueError
            If string arithmetic operations aren't possible
        """
        # only check compatibility if right is an ElandQueryCompiler
        # else return the raw string as the new field name
        right_agg = {right_field: right_field}
        if right:
            self.check_arithmetics(right)
            right_agg = right._mappings.aggregatable_field_names([right_field])
        self_agg = self._mappings.aggregatable_field_names([self_field])
        if self_agg and right_agg:
            return list(self_agg.keys())[0], list(right_agg.keys())[0]
        else:
            raise ValueError(
                "Can not perform arithmetic operations on non aggregatable fields"
                "One of [{}, {}] is not aggregatable.".format(self_field, right_field)
            )
    def arithmetic_op_fields(self, new_field_name, op, left_field, right_field, op_type=None):
        result = self.copy()
-        result._operations.arithmetic_op_fields(new_field_name, op, left_field, right_field, op_type)
+        # create a new field name for this display name
        scripted_field_name = "script_field_{}".format(display_name)
        # add scripted field
        result._mappings.add_scripted_field(scripted_field_name, display_name, arithmetic_object.dtype.name)
        result._operations.arithmetic_op_fields(scripted_field_name, arithmetic_object)
        return result
-    """
+    def get_arithmetic_op_fields(self):
-    Internal class to deal with column renaming and script_fields
+        return self._operations.get_arithmetic_op_fields()
    """
-    class DisplayNameToFieldNameMapper:
+    def display_name_to_aggregatable_name(self, display_name):
-        def __init__(self,
+        aggregatable_field_name = self._mappings.aggregatable_field_name(display_name)
                     field_to_display_names=None,
                     display_to_field_names=None):
-            if field_to_display_names is not None:
+        return aggregatable_field_name
                self._field_to_display_names = field_to_display_names
            else:
                self._field_to_display_names = {}
            if display_to_field_names is not None:
                self._display_to_field_names = display_to_field_names
            else:
                self._display_to_field_names = {}
        def rename_display_name(self, renames):
            for current_display_name, new_display_name in renames.items():
                if current_display_name in self._display_to_field_names:
                    # has been renamed already - update name
                    field_name = self._display_to_field_names[current_display_name]
                    del self._display_to_field_names[current_display_name]
                    del self._field_to_display_names[field_name]
                    self._display_to_field_names[new_display_name] = field_name
                    self._field_to_display_names[field_name] = new_display_name
                else:
                    # new rename - assume 'current_display_name' is 'field_name'
                    field_name = current_display_name
                    # if field_name is already mapped ignore
                    if field_name not in self._field_to_display_names:
                        self._display_to_field_names[new_display_name] = field_name
                        self._field_to_display_names[field_name] = new_display_name
        def field_names_to_list(self):
            return sorted(list(self._field_to_display_names.keys()))
        def display_names_to_list(self):
            return sorted(list(self._display_to_field_names.keys()))
        # Return mapper values as dict
        def display_names_mapper(self):
            return self._field_to_display_names
        @property
        def empty(self):
            return not self._display_to_field_names
        def field_to_display_names(self, field_names):
            if self.empty:
                return field_names
            display_names = []
            for field_name in field_names:
                if field_name in self._field_to_display_names:
                    display_name = self._field_to_display_names[field_name]
                else:
                    display_name = field_name
                display_names.append(display_name)
            return display_names
        def display_to_field_names(self, display_names):
            if self.empty:
                return display_names
            field_names = []
            for display_name in display_names:
                if display_name in self._display_to_field_names:
                    field_name = self._display_to_field_names[display_name]
                else:
                    field_name = display_name
                field_names.append(field_name)
            return field_names
        def __constructor__(self, *args, **kwargs):
            return type(self)(*args, **kwargs)
        def copy(self):
            return self.__constructor__(
                field_to_display_names=self._field_to_display_names.copy(),
                display_to_field_names=self._display_to_field_names.copy()
            )
        def info_es(self, buf):
            buf.write("'field_to_display_names': {}\n".format(self._field_to_display_names))
            buf.write("'display_to_field_names': {}\n".format(self._display_to_field_names))
 def elasticsearch_date_to_pandas_date(value: Union[int, str], date_format: str) -> pd.Timestamp:
--- a/eland/series.py
+++ b/eland/series.py
@ -38,6 +38,7 @@ import pandas as pd
 from pandas.io.common import _expand_user, _stringify_path
 from eland import NDFrame
 from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber
 from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
 from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
 import eland.plotting as gfx
@ -147,6 +148,16 @@ class Series(NDFrame):
        return num_rows, num_columns
    @property
    def field_name(self):
        """
        Returns
        -------
        field_name: str
            Return the Elasticsearch field name for this series
        """
        return self._query_compiler.field_names[0]
    def _get_name(self):
        return self._query_compiler.columns[0]
@ -160,7 +171,7 @@ class Series(NDFrame):
        Rename name of series. Only column rename is supported. This does not change the underlying
        Elasticsearch index, but adds a symbolic link from the new name (column) to the Elasticsearch field name.
-        For instance, if a field was called 'tot_quan' it could be renamed 'Total Quantity'.
+        For instance, if a field was called 'total_quantity' it could be renamed 'Total Quantity'.
        Parameters
        ----------
@ -535,12 +546,7 @@ class Series(NDFrame):
        4    First name: Eddie
        Name: customer_first_name, dtype: object
        """
-        if self._dtype == 'object':
+        return self._numeric_op(right, _get_method_name())
            op_type = ('string',)
        else:
            op_type = ('numeric',)
        return self._numeric_op(right, _get_method_name(), op_type)
    def __truediv__(self, right):
        """
@ -806,12 +812,7 @@ class Series(NDFrame):
        4     81.980003
        Name: taxful_total_price, dtype: float64
        """
-        if self._dtype == 'object':
+        return self._numeric_op(left, _get_method_name())
            op_type = ('string',)
        else:
            op_type = ('numeric',)
        return self._numeric_rop(left, _get_method_name(), op_type)
    def __rtruediv__(self, left):
        """
@ -843,7 +844,7 @@ class Series(NDFrame):
        4    0.012349
        Name: taxful_total_price, dtype: float64
        """
-        return self._numeric_rop(left, _get_method_name())
+        return self._numeric_op(left, _get_method_name())
    def __rfloordiv__(self, left):
        """
@ -875,7 +876,7 @@ class Series(NDFrame):
        4     6.0
        Name: taxful_total_price, dtype: float64
        """
-        return self._numeric_rop(left, _get_method_name())
+        return self._numeric_op(left, _get_method_name())
    def __rmod__(self, left):
        """
@ -907,7 +908,7 @@ class Series(NDFrame):
        4     14.119980
        Name: taxful_total_price, dtype: float64
        """
-        return self._numeric_rop(left, _get_method_name())
+        return self._numeric_op(left, _get_method_name())
    def __rmul__(self, left):
        """
@ -939,7 +940,7 @@ class Series(NDFrame):
        4     809.800034
        Name: taxful_total_price, dtype: float64
        """
-        return self._numeric_rop(left, _get_method_name())
+        return self._numeric_op(left, _get_method_name())
    def __rpow__(self, left):
        """
@ -971,7 +972,7 @@ class Series(NDFrame):
        4    4.0
        Name: total_quantity, dtype: float64
        """
-        return self._numeric_rop(left, _get_method_name())
+        return self._numeric_op(left, _get_method_name())
    def __rsub__(self, left):
        """
@ -1003,7 +1004,7 @@ class Series(NDFrame):
        4    -79.980003
        Name: taxful_total_price, dtype: float64
        """
-        return self._numeric_rop(left, _get_method_name())
+        return self._numeric_op(left, _get_method_name())
    add = __add__
    div = __truediv__
@ -1029,131 +1030,58 @@ class Series(NDFrame):
    rsubtract = __rsub__
    rtruediv = __rtruediv__
-    def _numeric_op(self, right, method_name, op_type=None):
+    def _numeric_op(self, right, method_name):
        """
        return a op b
        a & b == Series
            a & b must share same eland.Client, index_pattern and index_field
-        a == Series, b == numeric
+        a == Series, b == numeric or string
        Naming of the resulting Series
        ------------------------------
        result = SeriesA op SeriesB
        result.name == None
        result = SeriesA op np.number
        result.name == SeriesA.name
        result = SeriesA op str
        result.name == SeriesA.name
        Naming is consistent for rops
        """
        # print("_numeric_op", self, right, method_name)
        if isinstance(right, Series):
-            # Check compatibility of Elasticsearch cluster
+            # Check we can the 2 Series are compatible (raises on error):
            self._query_compiler.check_arithmetics(right._query_compiler)
-            # check left numeric series and right numeric series
+            right_object = ArithmeticSeries(right._query_compiler, right.name, right._dtype)
-            if (np.issubdtype(self._dtype, np.number) and np.issubdtype(right._dtype, np.number)):
+            display_name = None
-                new_field_name = "{0}_{1}_{2}".format(self.name, method_name, right.name)
+        elif np.issubdtype(np.dtype(type(right)), np.number):
-
+            right_object = ArithmeticNumber(right, np.dtype(type(right)))
-                # Compatible, so create new Series
+            display_name = self.name
-                series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
+        elif isinstance(right, str):
-                    new_field_name, method_name, self.name, right.name))
+            right_object = ArithmeticString(right)
-                series.name = None
+            display_name = self.name
                return series
            # check left object series and right object series
            elif self._dtype == 'object' and right._dtype == 'object':
                new_field_name = "{0}_{1}_{2}".format(self.name, method_name, right.name)
                # our operation is between series
                op_type = op_type + tuple('s')
                # check if fields are aggregatable
                self.name, right.name = self._query_compiler.check_str_arithmetics(right._query_compiler, self.name,
                                                                                   right.name)
                series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
                    new_field_name, method_name, self.name, right.name, op_type))
                series.name = None
                return series
            else:
                # TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
                raise TypeError(
                    "unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
                        .format(method_name, type(self), self._dtype, type(right).__name__)
                )
        # check left number and right numeric series
        elif np.issubdtype(np.dtype(type(right)), np.number) and np.issubdtype(self._dtype, np.number):
            new_field_name = "{0}_{1}_{2}".format(self.name, method_name, str(right).replace('.', '_'))
            # Compatible, so create new Series
            series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
                new_field_name, method_name, self.name, right))
            # name of Series remains original name
            series.name = self.name
            return series
        # check left str series and right str
        elif isinstance(right, str) and self._dtype == 'object':
            new_field_name = "{0}_{1}_{2}".format(self.name, method_name, str(right).replace('.', '_'))
            self.name, right = self._query_compiler.check_str_arithmetics(None, self.name, right)
            # our operation is between a series and a string on the right
            op_type = op_type + tuple('r')
            # Compatible, so create new Series
            series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
                new_field_name, method_name, self.name, right, op_type))
            # truncate last occurence of '.keyword'
            new_series_name = self.name.rsplit('.keyword', 1)[0]
            series.name = new_series_name
            return series
        else:
            # TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
            raise TypeError(
                "unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
                    .format(method_name, type(self), self._dtype, type(right).__name__)
            )
-    def _numeric_rop(self, left, method_name, op_type=None):
+        left_object = ArithmeticSeries(self._query_compiler, self.name, self._dtype)
-        """
+        left_object.arithmetic_operation(method_name, right_object)
        e.g. 1 + ed.Series
        """
        op_method_name = str(method_name).replace('__r', '__')
        if isinstance(left, Series):
            # if both are Series, revese args and call normal op method and remove 'r' from radd etc.
            return left._numeric_op(self, op_method_name)
        elif np.issubdtype(np.dtype(type(left)), np.number) and np.issubdtype(self._dtype, np.number):
            # Prefix new field name with 'f_' so it's a valid ES field name
            new_field_name = "f_{0}_{1}_{2}".format(str(left).replace('.', '_'), op_method_name, self.name)
-            # Compatible, so create new Series
+        series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(display_name, left_object))
            series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
                new_field_name, op_method_name, left, self.name))
-            # name of Series pinned to valid series (like pandas)
+        # force set name to 'display_name'
-            series.name = self.name
+        series._query_compiler._mappings.display_names = [display_name]
-            return series
+        return series
-        elif isinstance(left, str) and self._dtype == 'object':
+    def max(self, numeric_only=True):
            new_field_name = "{0}_{1}_{2}".format(self.name, op_method_name, str(left).replace('.', '_'))
            self.name, left = self._query_compiler.check_str_arithmetics(None, self.name, left)
            # our operation is between a series and a string on the right
            op_type = op_type + tuple('l')
            # Compatible, so create new Series
            series = Series(query_compiler=self._query_compiler.arithmetic_op_fields(
                new_field_name, op_method_name, left, self.name, op_type))
            # truncate last occurence of '.keyword'
            new_series_name = self.name.rsplit('.keyword', 1)[0]
            series.name = new_series_name
            return series
        else:
            # TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
            raise TypeError(
                "unsupported operation type(s) ['{}'] for operands ['{}' with dtype '{}', '{}']"
                    .format(op_method_name, type(self), self._dtype, type(left).__name__)
            )
    def max(self):
        """
        Return the maximum of the Series values
@ -1177,7 +1105,7 @@ class Series(NDFrame):
        results = super().max()
        return results.squeeze()
-    def mean(self):
+    def mean(self, numeric_only=True):
        """
        Return the mean of the Series values
@ -1201,7 +1129,7 @@ class Series(NDFrame):
        results = super().mean()
        return results.squeeze()
-    def min(self):
+    def min(self, numeric_only=True):
        """
        Return the minimum of the Series values
@ -1225,7 +1153,7 @@ class Series(NDFrame):
        results = super().min()
        return results.squeeze()
-    def sum(self):
+    def sum(self, numeric_only=True):
        """
        Return the sum of the Series values
--- a/eland/tasks.py
+++ b/eland/tasks.py
@ -1,9 +1,8 @@
 from abc import ABC, abstractmethod
 import numpy as np
 from eland import SortOrder
 from eland.actions import HeadAction, TailAction, SortIndexAction
 from eland.arithmetics import ArithmeticSeries
 # -------------------------------------------------------------------------------------------------------------------- #
@ -27,7 +26,7 @@ class Task(ABC):
        return self._task_type
    @abstractmethod
-    def resolve_task(self, query_params, post_processing):
+    def resolve_task(self, query_params, post_processing, query_compiler):
        pass
    @abstractmethod
@ -56,7 +55,7 @@ class HeadTask(SizeTask):
    def __repr__(self):
        return "('{}': ('sort_field': '{}', 'count': {}))".format(self._task_type, self._sort_field, self._count)
-    def resolve_task(self, query_params, post_processing):
+    def resolve_task(self, query_params, post_processing, query_compiler):
        # head - sort asc, size n
        # |12345-------------|
        query_sort_field = self._sort_field
@ -102,7 +101,7 @@ class TailTask(SizeTask):
    def __repr__(self):
        return "('{}': ('sort_field': '{}', 'count': {}))".format(self._task_type, self._sort_field, self._count)
-    def resolve_task(self, query_params, post_processing):
+    def resolve_task(self, query_params, post_processing, query_compiler):
        # tail - sort desc, size n, post-process sort asc
        # |-------------12345|
        query_sort_field = self._sort_field
@ -164,7 +163,7 @@ class QueryIdsTask(Task):
        self._must = must
        self._ids = ids
-    def resolve_task(self, query_params, post_processing):
+    def resolve_task(self, query_params, post_processing, query_compiler):
        query_params['query'].ids(self._ids, must=self._must)
        return query_params, post_processing
@ -197,7 +196,7 @@ class QueryTermsTask(Task):
        return "('{}': ('must': {}, 'field': '{}', 'terms': {}))".format(self._task_type, self._must, self._field,
                                                                         self._terms)
-    def resolve_task(self, query_params, post_processing):
+    def resolve_task(self, query_params, post_processing, query_compiler):
        query_params['query'].terms(self._field, self._terms, must=self._must)
        return query_params, post_processing
@ -218,194 +217,57 @@ class BooleanFilterTask(Task):
    def __repr__(self):
        return "('{}': ('boolean_filter': {}))".format(self._task_type, repr(self._boolean_filter))
-    def resolve_task(self, query_params, post_processing):
+    def resolve_task(self, query_params, post_processing, query_compiler):
        query_params['query'].update_boolean_filter(self._boolean_filter)
        return query_params, post_processing
 class ArithmeticOpFieldsTask(Task):
-    def __init__(self, field_name, op_name, left_field, right_field, op_type):
+    def __init__(self, display_name, arithmetic_series):
        super().__init__("arithmetic_op_fields")
-        self._field_name = field_name
+        self._display_name = display_name
-        self._op_name = op_name
+
-        self._left_field = left_field
+        if not isinstance(arithmetic_series, ArithmeticSeries):
-        self._right_field = right_field
+            raise TypeError("Expecting ArithmeticSeries got {}".format(type(arithmetic_series)))
-        self._op_type = op_type
+        self._arithmetic_series = arithmetic_series
    def __repr__(self):
        return "('{}': (" \
-               "'field_name': {}, " \
+               "'display_name': {}, " \
-               "'op_name': {}, " \
+               "'arithmetic_object': {}" \
               "'left_field': {}, " \
               "'right_field': {}, " \
               "'op_type': {}" \
               "))" \
-            .format(self._task_type, self._field_name, self._op_name, self._left_field, self._right_field,
+            .format(self._task_type, self._display_name, self._arithmetic_series)
                    self._op_type)
-    def resolve_task(self, query_params, post_processing):
+    def update(self, display_name, arithmetic_series):
        self._display_name = display_name
        self._arithmetic_series = arithmetic_series
    def resolve_task(self, query_params, post_processing, query_compiler):
        # https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-api-reference-shared-java-lang.html#painless-api-reference-shared-Math
-        if not self._op_type:
+        """
-            if isinstance(self._left_field, str) and isinstance(self._right_field, str):
+        "script_fields": {
-                """
+            "field_name": {
-                (if op_name = '__truediv__')
+            "script": {
-
+                "source": "doc[self._left_field].value / self._right_field"
-                "script_fields": {
+            }
-                    "field_name": {
+            }
-                    "script": {
+        }
-                        "source": "doc[left_field].value / doc[right_field].value"
+        """
                    }
                    }
                }
                """
                if self._op_name == '__add__':
                    source = "doc['{0}'].value + doc['{1}'].value".format(self._left_field, self._right_field)
                elif self._op_name == '__truediv__':
                    source = "doc['{0}'].value / doc['{1}'].value".format(self._left_field, self._right_field)
                elif self._op_name == '__floordiv__':
                    source = "Math.floor(doc['{0}'].value / doc['{1}'].value)".format(self._left_field,
                                                                                      self._right_field)
                elif self._op_name == '__pow__':
                    source = "Math.pow(doc['{0}'].value, doc['{1}'].value)".format(self._left_field, self._right_field)
                elif self._op_name == '__mod__':
                    source = "doc['{0}'].value % doc['{1}'].value".format(self._left_field, self._right_field)
                elif self._op_name == '__mul__':
                    source = "doc['{0}'].value * doc['{1}'].value".format(self._left_field, self._right_field)
                elif self._op_name == '__sub__':
                    source = "doc['{0}'].value - doc['{1}'].value".format(self._left_field, self._right_field)
                else:
                    raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
                if query_params['query_script_fields'] is None:
                    query_params['query_script_fields'] = dict()
                query_params['query_script_fields'][self._field_name] = {
                    'script': {
                        'source': source
                    }
                }
            elif isinstance(self._left_field, str) and np.issubdtype(np.dtype(type(self._right_field)), np.number):
                """
                (if self._op_name = '__truediv__')
                "script_fields": {
                    "field_name": {
                    "script": {
                        "source": "doc[self._left_field].value / self._right_field"
                    }
                    }
                }
                """
                if self._op_name == '__add__':
                    source = "doc['{0}'].value + {1}".format(self._left_field, self._right_field)
                elif self._op_name == '__truediv__':
                    source = "doc['{0}'].value / {1}".format(self._left_field, self._right_field)
                elif self._op_name == '__floordiv__':
                    source = "Math.floor(doc['{0}'].value / {1})".format(self._left_field, self._right_field)
                elif self._op_name == '__pow__':
                    source = "Math.pow(doc['{0}'].value, {1})".format(self._left_field, self._right_field)
                elif self._op_name == '__mod__':
                    source = "doc['{0}'].value % {1}".format(self._left_field, self._right_field)
                elif self._op_name == '__mul__':
                    source = "doc['{0}'].value * {1}".format(self._left_field, self._right_field)
                elif self._op_name == '__sub__':
                    source = "doc['{0}'].value - {1}".format(self._left_field, self._right_field)
                else:
                    raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
            elif np.issubdtype(np.dtype(type(self._left_field)), np.number) and isinstance(self._right_field, str):
                """
                (if self._op_name = '__truediv__')
                "script_fields": {
                    "field_name": {
                    "script": {
                        "source": "self._left_field / doc['self._right_field'].value"
                    }
                    }
                }
                """
                if self._op_name == '__add__':
                    source = "{0} + doc['{1}'].value".format(self._left_field, self._right_field)
                elif self._op_name == '__truediv__':
                    source = "{0} / doc['{1}'].value".format(self._left_field, self._right_field)
                elif self._op_name == '__floordiv__':
                    source = "Math.floor({0} / doc['{1}'].value)".format(self._left_field, self._right_field)
                elif self._op_name == '__pow__':
                    source = "Math.pow({0}, doc['{1}'].value)".format(self._left_field, self._right_field)
                elif self._op_name == '__mod__':
                    source = "{0} % doc['{1}'].value".format(self._left_field, self._right_field)
                elif self._op_name == '__mul__':
                    source = "{0} * doc['{1}'].value".format(self._left_field, self._right_field)
                elif self._op_name == '__sub__':
                    source = "{0} - doc['{1}'].value".format(self._left_field, self._right_field)
                else:
                    raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
            else:
                raise TypeError("Types for operation inconsistent {} {} {}", type(self._left_field),
                                type(self._right_field), self._op_name)
        elif self._op_type[0] == "string":
            # we need to check the type of string addition
            if self._op_type[1] == "s":
                """
                (if self._op_name = '__add__')
                "script_fields": {
                    "field_name": {
                    "script": {
                        "source": "doc[self._left_field].value + doc[self._right_field].value"
                    }
                    }
                }
                """
                if self._op_name == '__add__':
                    source = "doc['{0}'].value + doc['{1}'].value".format(self._left_field, self._right_field)
                else:
                    raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
            elif self._op_type[1] == "r":
                if isinstance(self._left_field, str) and isinstance(self._right_field, str):
                    """
                    (if self._op_name = '__add__')
                    "script_fields": {
                        "field_name": {
                        "script": {
                            "source": "doc[self._left_field].value + self._right_field"
                        }
                        }
                    }
                    """
                    if self._op_name == '__add__':
                        source = "doc['{0}'].value + '{1}'".format(self._left_field, self._right_field)
                    else:
                        raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
            elif self._op_type[1] == 'l':
                if isinstance(self._left_field, str) and isinstance(self._right_field, str):
                    """
                    (if self._op_name = '__add__')
                    "script_fields": {
                        "field_name": {
                        "script": {
                            "source": "self._left_field + doc[self._right_field].value"
                        }
                        }
                    }
                    """
                    if self._op_name == '__add__':
                        source = "'{0}' + doc['{1}'].value".format(self._left_field, self._right_field)
                    else:
                        raise NotImplementedError("Not implemented operation '{0}'".format(self._op_name))
        if query_params['query_script_fields'] is None:
            query_params['query_script_fields'] = dict()
-        query_params['query_script_fields'][self._field_name] = {
+
        if self._display_name in query_params['query_script_fields']:
            raise NotImplementedError(
                "TODO code path - combine multiple ops '{}'\n{}\n{}\n{}".format(self,
                                                                                query_params['query_script_fields'],
                                                                                self._display_name,
                                                                                self._arithmetic_series.resolve()))
        query_params['query_script_fields'][self._display_name] = {
            'script': {
-                'source': source
+                'source': self._arithmetic_series.resolve()
            }
        }
--- a/eland/tests/dataframe/test_datetime_pytest.py
+++ b/eland/tests/dataframe/test_datetime_pytest.py
@ -17,6 +17,7 @@ from datetime import datetime
 import numpy as np
 import pandas as pd
 from pandas.util.testing import assert_series_equal
 import eland as ed
 from eland.tests.common import ES_TEST_CLIENT
@ -87,7 +88,7 @@ class TestDataFrameDateTime(TestData):
                           'F': {'type': 'boolean'},
                           'G': {'type': 'long'}}}}
-        mappings = ed.Mappings._generate_es_mappings(df)
+        mappings = ed.FieldMappings._generate_es_mappings(df)
        assert expected_mappings == mappings
@ -97,7 +98,14 @@ class TestDataFrameDateTime(TestData):
        ed_df = ed.pandas_to_eland(df, ES_TEST_CLIENT, index_name, if_exists="replace", refresh=True)
        ed_df_head = ed_df.head()
-        assert_pandas_eland_frame_equal(df, ed_df_head)
+        print(df.to_string())
        print(ed_df.to_string())
        print(ed_df.dtypes)
        print(ed_df._to_pandas().dtypes)
        assert_series_equal(df.dtypes, ed_df.dtypes)
        assert_pandas_eland_frame_equal(df, ed_df)
    def test_all_formats(self):
        index_name = self.time_index_name
--- a/eland/tests/dataframe/test_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_dtypes_pytest.py
@ -24,8 +24,11 @@ from eland.tests.common import assert_pandas_eland_frame_equal
 class TestDataFrameDtypes(TestData):
    def test_flights_dtypes(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()
        print(pd_flights.dtypes)
        print(ed_flights.dtypes)
        assert_series_equal(pd_flights.dtypes, ed_flights.dtypes)
@ -33,8 +36,8 @@ class TestDataFrameDtypes(TestData):
            assert isinstance(pd_flights.dtypes[i], type(ed_flights.dtypes[i]))
    def test_flights_select_dtypes(self):
        ed_flights = self.ed_flights_small()
        pd_flights = self.pd_flights_small()
        ed_flights = self.ed_flights_small()
        assert_pandas_eland_frame_equal(
            pd_flights.select_dtypes(include=np.number),
--- a/eland/tests/dataframe/test_hist_pytest.py
+++ b/eland/tests/dataframe/test_hist_pytest.py
@ -38,6 +38,9 @@ class TestDataFrameHist(TestData):
        pd_weights = pd.DataFrame(
            {'DistanceKilometers': pd_distancekilometers[0], 'FlightDelayMin': pd_flightdelaymin[0]})
        t = ed_flights[['DistanceKilometers', 'FlightDelayMin']]
        print(t.columns)
        ed_bins, ed_weights = ed_flights[['DistanceKilometers', 'FlightDelayMin']]._hist(num_bins=num_bins)
        # Numbers are slightly different
--- a/eland/tests/dataframe/test_repr_pytest.py
+++ b/eland/tests/dataframe/test_repr_pytest.py
@ -19,7 +19,7 @@ import pytest
 from eland.compat import PY36
 from eland.dataframe import DEFAULT_NUM_ROWS_DISPLAYED
-from eland.tests.common import TestData
+from eland.tests.common import TestData, assert_pandas_eland_series_equal
 class TestDataFrameRepr(TestData):
@ -46,27 +46,75 @@ class TestDataFrameRepr(TestData):
    to_string
    """
    def test_simple_lat_lon(self):
        """
        Note on nested object order - this can change when
        note this could be a bug in ES...
        PUT my_index/doc/1
        {
          "location": {
            "lat": "50.033333",
            "lon": "8.570556"
          }
        }
        GET my_index/_search
        "_source": {
          "location": {
            "lat": "50.033333",
            "lon": "8.570556"
          }
        }
        GET my_index/_search
        {
          "_source": "location"
        }
        "_source": {
          "location": {
            "lon": "8.570556",
            "lat": "50.033333"
          }
        }
        Hence we store the pandas df source json as 'lon', 'lat'
        """
        if PY36:
            pd_dest_location = self.pd_flights()['DestLocation'].head(1)
            ed_dest_location = self.ed_flights()['DestLocation'].head(1)
            assert_pandas_eland_series_equal(pd_dest_location, ed_dest_location)
        else:
            # NOOP
            assert True
    def test_num_rows_to_string(self):
-        # check setup works
+        if PY36:
-        assert pd.get_option('display.max_rows') == 60
+            # check setup works
            assert pd.get_option('display.max_rows') == 60
-        # Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
+            # Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
-        # In pandas calling 'to_string' without max_rows set, will dump ALL rows
+            # In pandas calling 'to_string' without max_rows set, will dump ALL rows
-        # Test n-1, n, n+1 for edge cases
+            # Test n-1, n, n+1 for edge cases
-        self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED - 1)
+            self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED - 1)
-        self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED)
+            self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED)
-        with pytest.warns(UserWarning):
+            with pytest.warns(UserWarning):
-            # UserWarning displayed by eland here (compare to pandas with max_rows set)
+                # UserWarning displayed by eland here (compare to pandas with max_rows set)
-            self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED + 1, None, DEFAULT_NUM_ROWS_DISPLAYED)
+                self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED + 1, None, DEFAULT_NUM_ROWS_DISPLAYED)
-        # Test for where max_rows lt or gt num_rows
+            # Test for where max_rows lt or gt num_rows
-        self.num_rows_to_string(10, 5, 5)
+            self.num_rows_to_string(10, 5, 5)
-        self.num_rows_to_string(100, 200, 200)
+            self.num_rows_to_string(100, 200, 200)
        else:
            # NOOP
            assert True
    def num_rows_to_string(self, rows, max_rows_eland=None, max_rows_pandas=None):
-        ed_flights = self.ed_flights()
+        ed_flights = self.ed_flights()[['DestLocation', 'OriginLocation']]
-        pd_flights = self.pd_flights()
+        pd_flights = self.pd_flights()[['DestLocation', 'OriginLocation']]
        ed_head = ed_flights.head(rows)
        pd_head = pd_flights.head(rows)
@ -74,8 +122,8 @@ class TestDataFrameRepr(TestData):
        ed_head_str = ed_head.to_string(max_rows=max_rows_eland)
        pd_head_str = pd_head.to_string(max_rows=max_rows_pandas)
-        # print(ed_head_str)
+        # print("\n", ed_head_str)
-        # print(pd_head_str)
+        # print("\n", pd_head_str)
        assert pd_head_str == ed_head_str
--- a/eland/tests/dataframe/test_to_csv_pytest.py
+++ b/eland/tests/dataframe/test_to_csv_pytest.py
@ -45,6 +45,7 @@ class TestDataFrameToCSV(TestData):
        assert_frame_equal(pd_flights, pd_from_csv)
    def test_to_csv_full(self):
        return
        results_file = ROOT_DIR + '/dataframe/results/test_to_csv_full.csv'
        # Test is slow as it's for the full dataset, but it is useful as it goes over 10000 docs
--- a/eland/tests/dataframe/test_utils_pytest.py
+++ b/eland/tests/dataframe/test_utils_pytest.py
@ -43,7 +43,7 @@ class TestDataFrameUtils(TestData):
                           'F': {'type': 'boolean'},
                           'G': {'type': 'long'}}}}
-        mappings = ed.Mappings._generate_es_mappings(df)
+        mappings = ed.FieldMappings._generate_es_mappings(df)
        assert expected_mappings == mappings
@ -56,4 +56,6 @@ class TestDataFrameUtils(TestData):
        assert_pandas_eland_frame_equal(df, ed_df_head)
    def test_eland_to_pandas_performance(self):
        # TODO  - commented out for now for performance reasons
        return
        pd_df = ed.eland_to_pandas(self.ed_flights())
--- a/eland/tests/field_mappings/init.py
+++ b/eland/tests/field_mappings/init.py
--- a/eland/tests/field_mappings/test_aggregatables_pytest.py
+++ b/eland/tests/field_mappings/test_aggregatables_pytest.py
@ -13,16 +13,23 @@
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 import pytest
 import eland as ed
 from eland.tests import ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME
 from eland.tests.common import TestData
-class TestMappingsAggregatables(TestData):
+class TestAggregatables(TestData):
    @pytest.mark.filterwarnings("ignore:Aggregations not supported")
    def test_ecommerce_all_aggregatables(self):
-        ed_ecommerce = self.ed_ecommerce()
+        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME
        )
-        aggregatables = ed_ecommerce._query_compiler._mappings.aggregatable_field_names()
+        aggregatables = ed_field_mappings.aggregatable_field_names()
        expected = {'category.keyword': 'category',
                    'currency': 'currency',
@ -72,14 +79,52 @@ class TestMappingsAggregatables(TestData):
        assert expected == aggregatables
    def test_ecommerce_selected_aggregatables(self):
        ed_ecommerce = self.ed_ecommerce()
        expected = {'category.keyword': 'category',
                    'currency': 'currency',
                    'customer_birth_date': 'customer_birth_date',
                    'customer_first_name.keyword': 'customer_first_name',
                    'type': 'type', 'user': 'user'}
-        aggregatables = ed_ecommerce._query_compiler._mappings.aggregatable_field_names(expected.values())
+        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME,
            display_names=expected.values()
        )
        aggregatables = ed_field_mappings.aggregatable_field_names()
        assert expected == aggregatables
    def test_ecommerce_single_aggregatable_field(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME
        )
        assert 'user' == ed_field_mappings.aggregatable_field_name('user')
    def test_ecommerce_single_keyword_aggregatable_field(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME
        )
        assert 'customer_first_name.keyword' == ed_field_mappings.aggregatable_field_name('customer_first_name')
    def test_ecommerce_single_non_existant_field(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME
        )
        with pytest.raises(KeyError):
            aggregatable = ed_field_mappings.aggregatable_field_name('non_existant')
    @pytest.mark.filterwarnings("ignore:Aggregations not supported")
    def test_ecommerce_single_non_aggregatable_field(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME
        )
        assert None == ed_field_mappings.aggregatable_field_name('customer_gender')
--- a/eland/tests/field_mappings/test_datetime_pytest.py
+++ b/eland/tests/field_mappings/test_datetime_pytest.py
@ -0,0 +1,241 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 from datetime import datetime
 import eland as ed
 from eland.tests.common import ES_TEST_CLIENT
 from eland.tests.common import TestData
 class TestDateTime(TestData):
    times = ["2019-11-26T19:58:15.246+0000",
             "1970-01-01T00:00:03.000+0000"]
    time_index_name = 'test_time_formats'
    @classmethod
    def setup_class(cls):
        """ setup any state specific to the execution of the given class (which
        usually contains tests).
        """
        es = ES_TEST_CLIENT
        if es.indices.exists(cls.time_index_name):
            es.indices.delete(index=cls.time_index_name)
        dts = [datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%f%z")
               for time in cls.times]
        time_formats_docs = [TestDateTime.get_time_values_from_datetime(dt)
                             for dt in dts]
        mappings = {'properties': {}}
        for field_name, field_value in time_formats_docs[0].items():
            mappings['properties'][field_name] = {}
            mappings['properties'][field_name]['type'] = 'date'
            mappings['properties'][field_name]['format'] = field_name
        body = {"mappings": mappings}
        index = 'test_time_formats'
        es.indices.delete(index=index, ignore=[400, 404])
        es.indices.create(index=index, body=body)
        for i, time_formats in enumerate(time_formats_docs):
            es.index(index=index, body=time_formats, id=i)
        es.indices.refresh(index=index)
    @classmethod
    def teardown_class(cls):
        """ teardown any state that was previously setup with a call to
        setup_class.
        """
        es = ES_TEST_CLIENT
        es.indices.delete(index=cls.time_index_name)
    def test_all_formats(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=self.time_index_name
        )
        # do a rename so display_name for a field is different to es_field_name
        ed_field_mappings.rename({'strict_year_month': 'renamed_strict_year_month'})
        # buf = StringIO()
        # ed_field_mappings.info_es(buf)
        # print(buf.getvalue())
        for format_name in self.time_formats.keys():
            es_date_format = ed_field_mappings.get_date_field_format(format_name)
            assert format_name == es_date_format
    @staticmethod
    def get_time_values_from_datetime(dt: datetime) -> dict:
        time_formats = {
            "epoch_millis": int(dt.timestamp() * 1000),
            "epoch_second": int(dt.timestamp()),
            "strict_date_optional_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "basic_date": dt.strftime("%Y%m%d"),
            "basic_date_time": dt.strftime("%Y%m%dT%H%M%S.%f")[:-3] + dt.strftime("%z"),
            "basic_date_time_no_millis": dt.strftime("%Y%m%dT%H%M%S%z"),
            "basic_ordinal_date": dt.strftime("%Y%j"),
            "basic_ordinal_date_time": dt.strftime("%Y%jT%H%M%S.%f")[:-3] + dt.strftime("%z"),
            "basic_ordinal_date_time_no_millis": dt.strftime("%Y%jT%H%M%S%z"),
            "basic_time": dt.strftime("%H%M%S.%f")[:-3] + dt.strftime("%z"),
            "basic_time_no_millis": dt.strftime("%H%M%S%z"),
            "basic_t_time": dt.strftime("T%H%M%S.%f")[:-3] + dt.strftime("%z"),
            "basic_t_time_no_millis": dt.strftime("T%H%M%S%z"),
            "basic_week_date": dt.strftime("%GW%V%u"),
            "basic_week_date_time": dt.strftime("%GW%V%uT%H%M%S.%f")[:-3] + dt.strftime("%z"),
            "basic_week_date_time_no_millis": dt.strftime("%GW%V%uT%H%M%S%z"),
            "strict_date": dt.strftime("%Y-%m-%d"),
            "date": dt.strftime("%Y-%m-%d"),
            "strict_date_hour": dt.strftime("%Y-%m-%dT%H"),
            "date_hour": dt.strftime("%Y-%m-%dT%H"),
            "strict_date_hour_minute": dt.strftime("%Y-%m-%dT%H:%M"),
            "date_hour_minute": dt.strftime("%Y-%m-%dT%H:%M"),
            "strict_date_hour_minute_second": dt.strftime("%Y-%m-%dT%H:%M:%S"),
            "date_hour_minute_second": dt.strftime("%Y-%m-%dT%H:%M:%S"),
            "strict_date_hour_minute_second_fraction": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
            "date_hour_minute_second_fraction": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
            "strict_date_hour_minute_second_millis": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
            "date_hour_minute_second_millis": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3],
            "strict_date_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "date_time": dt.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "strict_date_time_no_millis": dt.strftime("%Y-%m-%dT%H:%M:%S%z"),
            "date_time_no_millis": dt.strftime("%Y-%m-%dT%H:%M:%S%z"),
            "strict_hour": dt.strftime("%H"),
            "hour": dt.strftime("%H"),
            "strict_hour_minute": dt.strftime("%H:%M"),
            "hour_minute": dt.strftime("%H:%M"),
            "strict_hour_minute_second": dt.strftime("%H:%M:%S"),
            "hour_minute_second": dt.strftime("%H:%M:%S"),
            "strict_hour_minute_second_fraction": dt.strftime("%H:%M:%S.%f")[:-3],
            "hour_minute_second_fraction": dt.strftime("%H:%M:%S.%f")[:-3],
            "strict_hour_minute_second_millis": dt.strftime("%H:%M:%S.%f")[:-3],
            "hour_minute_second_millis": dt.strftime("%H:%M:%S.%f")[:-3],
            "strict_ordinal_date": dt.strftime("%Y-%j"),
            "ordinal_date": dt.strftime("%Y-%j"),
            "strict_ordinal_date_time": dt.strftime("%Y-%jT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "ordinal_date_time": dt.strftime("%Y-%jT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "strict_ordinal_date_time_no_millis": dt.strftime("%Y-%jT%H:%M:%S%z"),
            "ordinal_date_time_no_millis": dt.strftime("%Y-%jT%H:%M:%S%z"),
            "strict_time": dt.strftime("%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "time": dt.strftime("%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "strict_time_no_millis": dt.strftime("%H:%M:%S%z"),
            "time_no_millis": dt.strftime("%H:%M:%S%z"),
            "strict_t_time": dt.strftime("T%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "t_time": dt.strftime("T%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "strict_t_time_no_millis": dt.strftime("T%H:%M:%S%z"),
            "t_time_no_millis": dt.strftime("T%H:%M:%S%z"),
            "strict_week_date": dt.strftime("%G-W%V-%u"),
            "week_date": dt.strftime("%G-W%V-%u"),
            "strict_week_date_time": dt.strftime("%G-W%V-%uT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "week_date_time": dt.strftime("%G-W%V-%uT%H:%M:%S.%f")[:-3] + dt.strftime("%z"),
            "strict_week_date_time_no_millis": dt.strftime("%G-W%V-%uT%H:%M:%S%z"),
            "week_date_time_no_millis": dt.strftime("%G-W%V-%uT%H:%M:%S%z"),
            "strict_weekyear": dt.strftime("%G"),
            "weekyear": dt.strftime("%G"),
            "strict_weekyear_week": dt.strftime("%G-W%V"),
            "weekyear_week": dt.strftime("%G-W%V"),
            "strict_weekyear_week_day": dt.strftime("%G-W%V-%u"),
            "weekyear_week_day": dt.strftime("%G-W%V-%u"),
            "strict_year": dt.strftime("%Y"),
            "year": dt.strftime("%Y"),
            "strict_year_month": dt.strftime("%Y-%m"),
            "year_month": dt.strftime("%Y-%m"),
            "strict_year_month_day": dt.strftime("%Y-%m-%d"),
            "year_month_day": dt.strftime("%Y-%m-%d"),
        }
        return time_formats
    time_formats = {
        "epoch_millis": "%Y-%m-%dT%H:%M:%S.%f",
        "epoch_second": "%Y-%m-%dT%H:%M:%S",
        "strict_date_optional_time": "%Y-%m-%dT%H:%M:%S.%f%z",
        "basic_date": "%Y%m%d",
        "basic_date_time": "%Y%m%dT%H%M%S.%f",
        "basic_date_time_no_millis": "%Y%m%dT%H%M%S%z",
        "basic_ordinal_date": "%Y%j",
        "basic_ordinal_date_time": "%Y%jT%H%M%S.%f%z",
        "basic_ordinal_date_time_no_millis": "%Y%jT%H%M%S%z",
        "basic_time": "%H%M%S.%f%z",
        "basic_time_no_millis": "%H%M%S%z",
        "basic_t_time": "T%H%M%S.%f%z",
        "basic_t_time_no_millis": "T%H%M%S%z",
        "basic_week_date": "%GW%V%u",
        "basic_week_date_time": "%GW%V%uT%H%M%S.%f%z",
        "basic_week_date_time_no_millis": "%GW%V%uT%H%M%S%z",
        "date": "%Y-%m-%d",
        "strict_date": "%Y-%m-%d",
        "strict_date_hour": "%Y-%m-%dT%H",
        "date_hour": "%Y-%m-%dT%H",
        "strict_date_hour_minute": "%Y-%m-%dT%H:%M",
        "date_hour_minute": "%Y-%m-%dT%H:%M",
        "strict_date_hour_minute_second": "%Y-%m-%dT%H:%M:%S",
        "date_hour_minute_second": "%Y-%m-%dT%H:%M:%S",
        "strict_date_hour_minute_second_fraction": "%Y-%m-%dT%H:%M:%S.%f",
        "date_hour_minute_second_fraction": "%Y-%m-%dT%H:%M:%S.%f",
        "strict_date_hour_minute_second_millis": "%Y-%m-%dT%H:%M:%S.%f",
        "date_hour_minute_second_millis": "%Y-%m-%dT%H:%M:%S.%f",
        "strict_date_time": "%Y-%m-%dT%H:%M:%S.%f%z",
        "date_time": "%Y-%m-%dT%H:%M:%S.%f%z",
        "strict_date_time_no_millis": "%Y-%m-%dT%H:%M:%S%z",
        "date_time_no_millis": "%Y-%m-%dT%H:%M:%S%z",
        "strict_hour": "%H",
        "hour": "%H",
        "strict_hour_minute": "%H:%M",
        "hour_minute": "%H:%M",
        "strict_hour_minute_second": "%H:%M:%S",
        "hour_minute_second": "%H:%M:%S",
        "strict_hour_minute_second_fraction": "%H:%M:%S.%f",
        "hour_minute_second_fraction": "%H:%M:%S.%f",
        "strict_hour_minute_second_millis": "%H:%M:%S.%f",
        "hour_minute_second_millis": "%H:%M:%S.%f",
        "strict_ordinal_date": "%Y-%j",
        "ordinal_date": "%Y-%j",
        "strict_ordinal_date_time": "%Y-%jT%H:%M:%S.%f%z",
        "ordinal_date_time": "%Y-%jT%H:%M:%S.%f%z",
        "strict_ordinal_date_time_no_millis": "%Y-%jT%H:%M:%S%z",
        "ordinal_date_time_no_millis": "%Y-%jT%H:%M:%S%z",
        "strict_time": "%H:%M:%S.%f%z",
        "time": "%H:%M:%S.%f%z",
        "strict_time_no_millis": "%H:%M:%S%z",
        "time_no_millis": "%H:%M:%S%z",
        "strict_t_time": "T%H:%M:%S.%f%z",
        "t_time": "T%H:%M:%S.%f%z",
        "strict_t_time_no_millis": "T%H:%M:%S%z",
        "t_time_no_millis": "T%H:%M:%S%z",
        "strict_week_date": "%G-W%V-%u",
        "week_date": "%G-W%V-%u",
        "strict_week_date_time": "%G-W%V-%uT%H:%M:%S.%f%z",
        "week_date_time": "%G-W%V-%uT%H:%M:%S.%f%z",
        "strict_week_date_time_no_millis": "%G-W%V-%uT%H:%M:%S%z",
        "week_date_time_no_millis": "%G-W%V-%uT%H:%M:%S%z",
        "strict_weekyear_week_day": "%G-W%V-%u",
        "weekyear_week_day": "%G-W%V-%u",
        "strict_year": "%Y",
        "year": "%Y",
        "strict_year_month": "%Y-%m",
        "year_month": "%Y-%m",
        "strict_year_month_day": "%Y-%m-%d",
        "year_month_day": "%Y-%m-%d"
    }
    # excluding these formats as pandas throws a ValueError
    # "strict_weekyear": ("%G", None) - not supported in pandas
    # "strict_weekyear_week": ("%G-W%V", None),
    # E   ValueError: ISO year directive '%G' must be used with the ISO week directive '%V' and a weekday directive '%A', '%a', '%w', or '%u'.
--- a/eland/tests/field_mappings/test_display_names_pytest.py
+++ b/eland/tests/field_mappings/test_display_names_pytest.py
@ -0,0 +1,94 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 import pytest
 import eland as ed
 from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
 from eland.tests.common import TestData
 class TestDisplayNames(TestData):
    def test_init_all_fields(self):
        field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        expected = self.pd_flights().columns.to_list()
        assert expected == field_mappings.display_names
    def test_init_selected_fields(self):
        expected = ['timestamp', 'DestWeather', 'DistanceKilometers', 'AvgTicketPrice']
        field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME,
            display_names=expected
        )
        assert expected == field_mappings.display_names
    def test_set_display_names(self):
        expected = ['Cancelled', 'timestamp', 'DestWeather', 'DistanceKilometers', 'AvgTicketPrice']
        field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        field_mappings.display_names = expected
        assert expected == field_mappings.display_names
        # now set again
        new_expected = ['AvgTicketPrice', 'timestamp']
        field_mappings.display_names = new_expected
        assert new_expected == field_mappings.display_names
    def test_not_found_display_names(self):
        not_found = ['Cancelled', 'timestamp', 'DestWeather', 'unknown', 'DistanceKilometers', 'AvgTicketPrice']
        field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        with pytest.raises(KeyError):
            field_mappings.display_names = not_found
        expected = self.pd_flights().columns.to_list()
        assert expected == field_mappings.display_names
    def test_invalid_list_type_display_names(self):
        field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        # not a list like object
        with pytest.raises(ValueError):
            field_mappings.display_names = 12.0
        # tuple is list like
        field_mappings.display_names = ('Cancelled', 'DestWeather')
        expected = ['Cancelled', 'DestWeather']
        assert expected == field_mappings.display_names
--- a/eland/tests/field_mappings/test_dtypes_pytest.py
+++ b/eland/tests/field_mappings/test_dtypes_pytest.py
@ -13,28 +13,34 @@
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 from pandas.util.testing import assert_series_equal
 import eland as ed
 from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
 from eland.tests.common import TestData
-class TestMappingsDtypes(TestData):
+class TestDTypes(TestData):
    def test_all_fields(self):
        field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
    def test_flights_dtypes_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
-        pd_dtypes = pd_flights.dtypes
+        assert_series_equal(pd_flights.dtypes, field_mappings.dtypes())
        ed_dtypes = ed_flights._query_compiler._mappings.dtypes()
-        assert_series_equal(pd_dtypes, ed_dtypes)
+    def test_selected_fields(self):
        expected = ['timestamp', 'DestWeather', 'DistanceKilometers', 'AvgTicketPrice']
-    def test_flights_dtypes_columns(self):
+        field_mappings = ed.FieldMappings(
-        ed_flights = self.ed_flights()
+            client=ed.Client(ES_TEST_CLIENT),
-        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]
+            index_pattern=FLIGHTS_INDEX_NAME,
            display_names=expected
        )
-        pd_dtypes = pd_flights.dtypes
+        pd_flights = self.pd_flights()[expected]
        ed_dtypes = ed_flights._query_compiler._mappings.dtypes(field_names=['Carrier', 'AvgTicketPrice', 'Cancelled'])
-        assert_series_equal(pd_dtypes, ed_dtypes)
+        assert_series_equal(pd_flights.dtypes, field_mappings.dtypes())
--- a/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py
+++ b/eland/tests/field_mappings/test_field_name_pd_dtype_pytest.py
@ -0,0 +1,49 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 import pytest
 from pandas.util.testing import assert_series_equal
 import eland as ed
 from eland.tests import FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING
 from eland.tests.common import ES_TEST_CLIENT
 from eland.tests.common import TestData
 class TestFieldNamePDDType(TestData):
    def test_all_formats(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        pd_flights = self.pd_flights()
        assert_series_equal(pd_flights.dtypes, ed_field_mappings.dtypes())
        for es_field_name in FLIGHTS_MAPPING['mappings']['properties'].keys():
            pd_dtype = ed_field_mappings.field_name_pd_dtype(es_field_name)
            assert pd_flights[es_field_name].dtype == pd_dtype
    def test_non_existant(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        with pytest.raises(KeyError):
            pd_dtype = ed_field_mappings.field_name_pd_dtype('unknown')
--- a/eland/tests/field_mappings/test_get_field_names_pytest.py
+++ b/eland/tests/field_mappings/test_get_field_names_pytest.py
@ -0,0 +1,78 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 import numpy as np
 import pandas as pd
 from pandas.util.testing import assert_index_equal
 # File called _pytest for PyCharm compatability
 import eland as ed
 from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT
 from eland.tests.common import TestData
 class TestGetFieldNames(TestData):
    def test_get_field_names_all(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        pd_flights = self.pd_flights()
        fields1 = ed_field_mappings.get_field_names(include_scripted_fields=False)
        fields2 = ed_field_mappings.get_field_names(include_scripted_fields=True)
        assert fields1 == fields2
        assert_index_equal(pd_flights.columns, pd.Index(fields1))
    def test_get_field_names_selected(self):
        expected = ['Carrier', 'AvgTicketPrice']
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME,
            display_names=expected
        )
        pd_flights = self.pd_flights()[expected]
        fields1 = ed_field_mappings.get_field_names(include_scripted_fields=False)
        fields2 = ed_field_mappings.get_field_names(include_scripted_fields=True)
        assert fields1 == fields2
        assert_index_equal(pd_flights.columns, pd.Index(fields1))
    def test_get_field_names_scripted(self):
        expected = ['Carrier', 'AvgTicketPrice']
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME,
            display_names=expected
        )
        pd_flights = self.pd_flights()[expected]
        fields1 = ed_field_mappings.get_field_names(include_scripted_fields=False)
        fields2 = ed_field_mappings.get_field_names(include_scripted_fields=True)
        assert fields1 == fields2
        assert_index_equal(pd_flights.columns, pd.Index(fields1))
        # now add scripted field
        ed_field_mappings.add_scripted_field('scripted_field_None', None, np.dtype('int64'))
        fields3 = ed_field_mappings.get_field_names(include_scripted_fields=False)
        fields4 = ed_field_mappings.get_field_names(include_scripted_fields=True)
        assert fields1 == fields3
        fields1.append('scripted_field_None')
        assert fields1 == fields4
--- a/eland/tests/field_mappings/test_numeric_source_fields_pytest.py
+++ b/eland/tests/field_mappings/test_numeric_source_fields_pytest.py
@ -16,16 +16,21 @@
 import numpy as np
 import eland as ed
 from eland.tests import ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME, FLIGHTS_INDEX_NAME
 from eland.tests.common import TestData
-class TestMappingsNumericSourceFields(TestData):
+class TestNumericSourceFields(TestData):
-    def test_flights_numeric_source_fields(self):
+    def test_flights_all_numeric_source_fields(self):
-        ed_flights = self.ed_flights()
+        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        pd_flights = self.pd_flights()
-        ed_numeric = ed_flights._query_compiler._mappings.numeric_source_fields(field_names=None, include_bool=False)
+        ed_numeric = ed_field_mappings.numeric_source_fields(include_bool=False)
        pd_numeric = pd_flights.select_dtypes(include=np.number)
        assert pd_numeric.columns.to_list() == ed_numeric
@ -40,19 +45,20 @@ class TestMappingsNumericSourceFields(TestData):
        customer_first_name            object
        user                           object
        """
-
+        ed_field_mappings = ed.FieldMappings(
-        ed_ecommerce = self.ed_ecommerce()[field_names]
+            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME,
            display_names=field_names
        )
        pd_ecommerce = self.pd_ecommerce()[field_names]
-        ed_numeric = ed_ecommerce._query_compiler._mappings.numeric_source_fields(field_names=field_names,
+        ed_numeric = ed_field_mappings.numeric_source_fields(include_bool=False)
                                                                                  include_bool=False)
        pd_numeric = pd_ecommerce.select_dtypes(include=np.number)
        assert pd_numeric.columns.to_list() == ed_numeric
    def test_ecommerce_selected_mixed_numeric_source_fields(self):
        field_names = ['category', 'currency', 'customer_birth_date', 'customer_first_name', 'total_quantity', 'user']
        """
        Note: one is numeric
        category                       object
@ -62,31 +68,34 @@ class TestMappingsNumericSourceFields(TestData):
        total_quantity                 int64
        user                           object
        """
-
+        ed_field_mappings = ed.FieldMappings(
-        ed_ecommerce = self.ed_ecommerce()[field_names]
+            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME,
            display_names=field_names
        )
        pd_ecommerce = self.pd_ecommerce()[field_names]
-        ed_numeric = ed_ecommerce._query_compiler._mappings.numeric_source_fields(field_names=field_names,
+        ed_numeric = ed_field_mappings.numeric_source_fields(include_bool=False)
                                                                                  include_bool=False)
        pd_numeric = pd_ecommerce.select_dtypes(include=np.number)
        assert pd_numeric.columns.to_list() == ed_numeric
    def test_ecommerce_selected_all_numeric_source_fields(self):
        field_names = ['total_quantity', 'taxful_total_price', 'taxless_total_price']
        """
        Note: all are numeric
        total_quantity           int64
        taxful_total_price     float64
        taxless_total_price    float64
        """
-
+        ed_field_mappings = ed.FieldMappings(
-        ed_ecommerce = self.ed_ecommerce()[field_names]
+            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=ECOMMERCE_INDEX_NAME,
            display_names=field_names
        )
        pd_ecommerce = self.pd_ecommerce()[field_names]
-        ed_numeric = ed_ecommerce._query_compiler._mappings.numeric_source_fields(field_names=field_names,
+        ed_numeric = ed_field_mappings.numeric_source_fields(include_bool=False)
                                                                                  include_bool=False)
        pd_numeric = pd_ecommerce.select_dtypes(include=np.number)
        assert pd_numeric.columns.to_list() == ed_numeric
--- a/eland/tests/field_mappings/test_rename_pytest.py
+++ b/eland/tests/field_mappings/test_rename_pytest.py
@ -0,0 +1,107 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 import eland as ed
 from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
 from eland.tests.common import TestData
 class TestRename(TestData):
    def test_single_rename(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        pd_flights_column_series = self.pd_flights().columns.to_series()
        assert pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        renames = {'DestWeather': 'renamed_DestWeather'}
        # inplace rename
        ed_field_mappings.rename(renames)
        assert pd_flights_column_series.rename(renames).index.to_list() == ed_field_mappings.display_names
        get_renames = ed_field_mappings.get_renames()
        assert renames == get_renames
    def test_non_exists_rename(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        pd_flights_column_series = self.pd_flights().columns.to_series()
        assert pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        renames = {'unknown': 'renamed_unknown'}
        # inplace rename - in this case it has no effect
        ed_field_mappings.rename(renames)
        assert pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        get_renames = ed_field_mappings.get_renames()
        assert not get_renames
    def test_exists_and_non_exists_rename(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        pd_flights_column_series = self.pd_flights().columns.to_series()
        assert pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        renames = {'unknown': 'renamed_unknown', 'DestWeather': 'renamed_DestWeather', 'unknown2': 'renamed_unknown2',
                   'Carrier': 'renamed_Carrier'}
        # inplace rename - only real names get renamed
        ed_field_mappings.rename(renames)
        assert pd_flights_column_series.rename(renames).index.to_list() == ed_field_mappings.display_names
        get_renames = ed_field_mappings.get_renames()
        assert {'Carrier': 'renamed_Carrier', 'DestWeather': 'renamed_DestWeather'} == get_renames
    def test_multi_rename(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        pd_flights_column_series = self.pd_flights().columns.to_series()
        assert pd_flights_column_series.index.to_list() == ed_field_mappings.display_names
        renames = {'DestWeather': 'renamed_DestWeather', 'renamed_DestWeather': 'renamed_renamed_DestWeather'}
        # inplace rename - only first rename gets renamed
        ed_field_mappings.rename(renames)
        assert pd_flights_column_series.rename(renames).index.to_list() == ed_field_mappings.display_names
        get_renames = ed_field_mappings.get_renames()
        assert {'DestWeather': 'renamed_DestWeather'} == get_renames
--- a/eland/tests/field_mappings/test_scripted_fields_pytest.py
+++ b/eland/tests/field_mappings/test_scripted_fields_pytest.py
@ -0,0 +1,62 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 from io import StringIO
 import numpy as np
 import eland as ed
 from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT, FLIGHTS_MAPPING
 from eland.tests.common import TestData
 class TestScriptedFields(TestData):
    def test_add_new_scripted_field(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        ed_field_mappings.add_scripted_field('scripted_field_None', None, np.dtype('int64'))
        # note 'None' is printed as 'NaN' in index, but .index shows it is 'None'
        # buf = StringIO()
        # ed_field_mappings.info_es(buf)
        # print(buf.getvalue())
        expected = self.pd_flights().columns.to_list()
        expected.append(None)
        assert expected == ed_field_mappings.display_names
    def test_add_duplicate_scripted_field(self):
        ed_field_mappings = ed.FieldMappings(
            client=ed.Client(ES_TEST_CLIENT),
            index_pattern=FLIGHTS_INDEX_NAME
        )
        ed_field_mappings.add_scripted_field('scripted_field_Carrier', 'Carrier', np.dtype('int64'))
        # note 'None' is printed as 'NaN' in index, but .index shows it is 'None'
        buf = StringIO()
        ed_field_mappings.info_es(buf)
        print(buf.getvalue())
        expected = self.pd_flights().columns.to_list()
        expected.remove('Carrier')
        expected.append('Carrier')
        assert expected == ed_field_mappings.display_names
--- a/eland/tests/flights_df.json.gz
+++ b/eland/tests/flights_df.json.gz
--- a/eland/tests/flights_small.json.gz
+++ b/eland/tests/flights_small.json.gz
--- a/eland/tests/query_compiler/test_get_field_names_pytest.py
+++ b/eland/tests/query_compiler/test_get_field_names_pytest.py
@ -0,0 +1,42 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 import pandas as pd
 from pandas.util.testing import assert_index_equal
 from eland.tests.common import TestData
 class TestGetFieldNames(TestData):
    def test_get_field_names_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
        fields1 = ed_flights._query_compiler.get_field_names(include_scripted_fields=False)
        fields2 = ed_flights._query_compiler.get_field_names(include_scripted_fields=True)
        assert fields1 == fields2
        assert_index_equal(pd_flights.columns, pd.Index(fields1))
    def test_get_field_names_selected(self):
        ed_flights = self.ed_flights()[['Carrier', 'AvgTicketPrice']]
        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice']]
        fields1 = ed_flights._query_compiler.get_field_names(include_scripted_fields=False)
        fields2 = ed_flights._query_compiler.get_field_names(include_scripted_fields=True)
        assert fields1 == fields2
        assert_index_equal(pd_flights.columns, pd.Index(fields1))
--- a/eland/tests/query_compiler/test_rename_pytest.py
+++ b/eland/tests/query_compiler/test_rename_pytest.py
@ -1,86 +0,0 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 # File called _pytest for PyCharm compatability
 from eland import QueryCompiler
 from eland.tests.common import TestData
 class TestQueryCompilerRename(TestData):
    def test_query_compiler_basic_rename(self):
        field_names = []
        display_names = []
        mapper = QueryCompiler.DisplayNameToFieldNameMapper()
        assert field_names == mapper.field_names_to_list()
        assert display_names == mapper.display_names_to_list()
        field_names = ['a']
        display_names = ['A']
        update_A = {'a': 'A'}
        mapper.rename_display_name(update_A)
        assert field_names == mapper.field_names_to_list()
        assert display_names == mapper.display_names_to_list()
        field_names = ['a', 'b']
        display_names = ['A', 'B']
        update_B = {'b': 'B'}
        mapper.rename_display_name(update_B)
        assert field_names == mapper.field_names_to_list()
        assert display_names == mapper.display_names_to_list()
        field_names = ['a', 'b']
        display_names = ['AA', 'B']
        update_AA = {'A': 'AA'}
        mapper.rename_display_name(update_AA)
        assert field_names == mapper.field_names_to_list()
        assert display_names == mapper.display_names_to_list()
    def test_query_compiler_basic_rename_columns(self):
        columns = ['a', 'b', 'c', 'd']
        mapper = QueryCompiler.DisplayNameToFieldNameMapper()
        display_names = ['A', 'b', 'c', 'd']
        update_A = {'a': 'A'}
        mapper.rename_display_name(update_A)
        assert display_names == mapper.field_to_display_names(columns)
        # Invalid update
        display_names = ['A', 'b', 'c', 'd']
        update_ZZ = {'a': 'ZZ'}
        mapper.rename_display_name(update_ZZ)
        assert display_names == mapper.field_to_display_names(columns)
        display_names = ['AA', 'b', 'c', 'd']
        update_AA = {'A': 'AA'}  # already renamed to 'A'
        mapper.rename_display_name(update_AA)
        assert display_names == mapper.field_to_display_names(columns)
        display_names = ['AA', 'b', 'C', 'd']
        update_AA_C = {'a': 'AA', 'c': 'C'}  # 'a' rename ignored
        mapper.rename_display_name(update_AA_C)
        assert display_names == mapper.field_to_display_names(columns)
--- a/eland/tests/series/test_arithmetics_pytest.py
+++ b/eland/tests/series/test_arithmetics_pytest.py
@ -29,6 +29,35 @@ class TestSeriesArithmetics(TestData):
        with pytest.raises(TypeError):
            ed_series = ed_df['total_quantity'] / pd_df['taxful_total_price']
    def test_ecommerce_series_simple_arithmetics(self):
        pd_df = self.pd_ecommerce().head(100)
        ed_df = self.ed_ecommerce().head(100)
        pd_series = pd_df['taxful_total_price'] + 5 + pd_df['total_quantity'] / pd_df['taxless_total_price'] - pd_df[
            'total_unique_products'] * 10.0 + pd_df['total_quantity']
        ed_series = ed_df['taxful_total_price'] + 5 + ed_df['total_quantity'] / ed_df['taxless_total_price'] - ed_df[
            'total_unique_products'] * 10.0 + ed_df['total_quantity']
        assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
    def test_ecommerce_series_simple_integer_addition(self):
        pd_df = self.pd_ecommerce().head(100)
        ed_df = self.ed_ecommerce().head(100)
        pd_series = pd_df['taxful_total_price'] + 5
        ed_series = ed_df['taxful_total_price'] + 5
        assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
    def test_ecommerce_series_simple_series_addition(self):
        pd_df = self.pd_ecommerce().head(100)
        ed_df = self.ed_ecommerce().head(100)
        pd_series = pd_df['taxful_total_price'] + pd_df['total_quantity']
        ed_series = ed_df['taxful_total_price'] + ed_df['total_quantity']
        assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
    def test_ecommerce_series_basic_arithmetics(self):
        pd_df = self.pd_ecommerce().head(100)
        ed_df = self.ed_ecommerce().head(100)
@ -199,7 +228,6 @@ class TestSeriesArithmetics(TestData):
        # str op int (throws)
        for op in non_string_numeric_ops:
            print(op)
            with pytest.raises(TypeError):
                pd_series = getattr(pd_df['currency'], op)(pd_df['total_quantity'])
            with pytest.raises(TypeError):
--- a/eland/tests/series/test_rename_pytest.py
+++ b/eland/tests/series/test_rename_pytest.py
@ -31,4 +31,18 @@ class TestSeriesRename(TestData):
        pd_renamed = pd_carrier.rename("renamed")
        ed_renamed = ed_carrier.rename("renamed")
        print(pd_renamed)
        print(ed_renamed)
        print(ed_renamed.info_es())
        assert_pandas_eland_series_equal(pd_renamed, ed_renamed)
        pd_renamed2 = pd_renamed.rename("renamed2")
        ed_renamed2 = ed_renamed.rename("renamed2")
        print(ed_renamed2.info_es())
        assert "renamed2" == ed_renamed2.name
        assert_pandas_eland_series_equal(pd_renamed2, ed_renamed2)
--- a/eland/tests/series/test_str_arithmetics_pytest.py
+++ b/eland/tests/series/test_str_arithmetics_pytest.py
@ -45,6 +45,11 @@ class TestSeriesArithmetics(TestData):
        assert_pandas_eland_series_equal(pdadd, edadd)
    def test_frame_add_str(self):
        pdadd = self.pd_ecommerce()[['customer_first_name', 'customer_last_name']] + "_steve"
        print(pdadd.head())
        print(pdadd.columns)
    def test_str_add_ser(self):
        edadd = "The last name is: " + self.ed_ecommerce()['customer_last_name']
        pdadd = "The last name is: " + self.pd_ecommerce()['customer_last_name']
@ -60,27 +65,27 @@ class TestSeriesArithmetics(TestData):
        assert_pandas_eland_series_equal(pdadd, edadd)
    def test_ser_add_str_add_ser(self):
-        pdadd = self.pd_ecommerce()['customer_first_name'] + self.pd_ecommerce()['customer_last_name']
+        pdadd = self.pd_ecommerce()['customer_first_name'] + " " + self.pd_ecommerce()['customer_last_name']
-        print(pdadd.name)
+        edadd = self.ed_ecommerce()['customer_first_name'] + " " + self.ed_ecommerce()['customer_last_name']
        edadd = self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_last_name']
        print(edadd.name)
        print(edadd.info_es())
        assert_pandas_eland_series_equal(pdadd, edadd)
    @pytest.mark.filterwarnings("ignore:Aggregations not supported")
    def test_non_aggregatable_add_str(self):
        with pytest.raises(ValueError):
            assert self.ed_ecommerce()['customer_gender'] + "is the gender"
    @pytest.mark.filterwarnings("ignore:Aggregations not supported")
    def teststr_add_non_aggregatable(self):
        with pytest.raises(ValueError):
            assert "The gender is: " + self.ed_ecommerce()['customer_gender']
    @pytest.mark.filterwarnings("ignore:Aggregations not supported")
    def test_non_aggregatable_add_aggregatable(self):
        with pytest.raises(ValueError):
            assert self.ed_ecommerce()['customer_gender'] + self.ed_ecommerce()['customer_first_name']
    @pytest.mark.filterwarnings("ignore:Aggregations not supported")
    def test_aggregatable_add_non_aggregatable(self):
        with pytest.raises(ValueError):
            assert self.ed_ecommerce()['customer_first_name'] + self.ed_ecommerce()['customer_gender']
--- a/eland/tests/series/test_value_counts_pytest.py
+++ b/eland/tests/series/test_value_counts_pytest.py
@ -60,6 +60,7 @@ class TestSeriesValueCounts(TestData):
        with pytest.raises(ValueError):
            assert ed_s.value_counts(es_size=-9)
    @pytest.mark.filterwarnings("ignore:Aggregations not supported")
    def test_value_counts_non_aggregatable(self):
        ed_s = self.ed_ecommerce()['customer_first_name']
        pd_s = self.pd_ecommerce()['customer_first_name']
--- a/eland/utils.py
+++ b/eland/utils.py
@ -19,7 +19,7 @@ from pandas.io.parsers import _c_parser_defaults
 from eland import Client
 from eland import DataFrame
-from eland import Mappings
+from eland import FieldMappings
 DEFAULT_CHUNK_SIZE = 10000
@ -97,7 +97,7 @@ def pandas_to_eland(pd_df, es_params, destination_index, if_exists='fail', chunk
    client = Client(es_params)
-    mapping = Mappings._generate_es_mappings(pd_df, geo_points)
+    mapping = FieldMappings._generate_es_mappings(pd_df, geo_points)
    # If table exists, check if_exists parameter
    if client.index_exists(index=destination_index):