From dff49d01fe5a5a1b68579c2c2d3fe0a34bad9e1a Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Wed, 13 Nov 2019 18:23:43 +0000 Subject: [PATCH 1/2] More doc updates. --- docs/source/conf.py | 16 +- .../reference/api/eland.DataFrame.agg.rst | 6 + .../api/eland.DataFrame.aggregate.rst | 6 + .../reference/api/eland.DataFrame.count.rst | 6 + .../api/eland.DataFrame.describe.rst | 6 + .../reference/api/eland.DataFrame.drop.rst | 6 + .../reference/api/eland.DataFrame.dtypes.rst | 6 + .../reference/api/eland.DataFrame.empty.rst | 6 + .../reference/api/eland.DataFrame.get.rst | 6 + .../reference/api/eland.DataFrame.hist.rst | 6 + .../reference/api/eland.DataFrame.info.rst | 6 + .../api/eland.DataFrame.select_dtypes.rst | 6 + docs/source/reference/api/eland.Index.rst | 6 + docs/source/reference/dataframe.rst | 41 ++++ docs/source/reference/index.rst | 1 + docs/source/reference/indexing.rst | 15 ++ eland/conftest.py | 17 ++ eland/dataframe.py | 194 ++++++++++++++---- eland/index.py | 34 ++- eland/mappings.py | 28 +-- eland/ndframe.py | 147 +++++++++++-- eland/plotting.py | 36 ++-- eland/query_compiler.py | 5 - eland/series.py | 2 +- eland/tests/dataframe/test_count_pytest.py | 1 + eland/tests/mappings/test_dtypes_pytest.py | 19 -- eland/utils.py | 34 +++ 27 files changed, 518 insertions(+), 144 deletions(-) create mode 100644 docs/source/reference/api/eland.DataFrame.agg.rst create mode 100644 docs/source/reference/api/eland.DataFrame.aggregate.rst create mode 100644 docs/source/reference/api/eland.DataFrame.count.rst create mode 100644 docs/source/reference/api/eland.DataFrame.describe.rst create mode 100644 docs/source/reference/api/eland.DataFrame.drop.rst create mode 100644 docs/source/reference/api/eland.DataFrame.dtypes.rst create mode 100644 docs/source/reference/api/eland.DataFrame.empty.rst create mode 100644 docs/source/reference/api/eland.DataFrame.get.rst create mode 100644 docs/source/reference/api/eland.DataFrame.hist.rst create mode 100644 docs/source/reference/api/eland.DataFrame.info.rst create mode 100644 docs/source/reference/api/eland.DataFrame.select_dtypes.rst create mode 100644 docs/source/reference/api/eland.Index.rst create mode 100644 docs/source/reference/indexing.rst create mode 100644 eland/conftest.py diff --git a/docs/source/conf.py b/docs/source/conf.py index f37c4d8..9eba27d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -40,7 +40,10 @@ release = '0.1' extensions = [ 'sphinx.ext.autodoc', "sphinx.ext.doctest", - 'numpydoc' + "sphinx.ext.extlinks", + 'numpydoc', + "matplotlib.sphinxext.plot_directive", + "sphinx.ext.todo", ] doctest_global_setup = ''' @@ -54,7 +57,18 @@ except ImportError: pd = None ''' +extlinks = {'pandas_docs': ('https://pandas.pydata.org/pandas-docs/version/0.25.1/reference/api/%s.html', '')} + numpydoc_attributes_as_param_list = False +numpydoc_show_class_members = False + +# matplotlib plot directive +plot_include_source = True +plot_formats = [("png", 90)] +plot_html_show_formats = False +plot_html_show_source_link = False +plot_pre_code = """import numpy as np +import eland as ed""" # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/reference/api/eland.DataFrame.agg.rst b/docs/source/reference/api/eland.DataFrame.agg.rst new file mode 100644 index 0000000..ef8c092 --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.agg.rst @@ -0,0 +1,6 @@ +eland.DataFrame.agg +=================== + +.. currentmodule:: eland + +.. automethod:: DataFrame.agg diff --git a/docs/source/reference/api/eland.DataFrame.aggregate.rst b/docs/source/reference/api/eland.DataFrame.aggregate.rst new file mode 100644 index 0000000..2e3468f --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.aggregate.rst @@ -0,0 +1,6 @@ +eland.DataFrame.aggregate +========================= + +.. currentmodule:: eland + +.. automethod:: DataFrame.aggregate diff --git a/docs/source/reference/api/eland.DataFrame.count.rst b/docs/source/reference/api/eland.DataFrame.count.rst new file mode 100644 index 0000000..a2d74fd --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.count.rst @@ -0,0 +1,6 @@ +eland.DataFrame.count +===================== + +.. currentmodule:: eland + +.. automethod:: DataFrame.count diff --git a/docs/source/reference/api/eland.DataFrame.describe.rst b/docs/source/reference/api/eland.DataFrame.describe.rst new file mode 100644 index 0000000..41a5d0c --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.describe.rst @@ -0,0 +1,6 @@ +eland.DataFrame.describe +======================== + +.. currentmodule:: eland + +.. automethod:: DataFrame.describe diff --git a/docs/source/reference/api/eland.DataFrame.drop.rst b/docs/source/reference/api/eland.DataFrame.drop.rst new file mode 100644 index 0000000..a01d5ce --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.drop.rst @@ -0,0 +1,6 @@ +eland.DataFrame.drop +==================== + +.. currentmodule:: eland + +.. automethod:: DataFrame.drop diff --git a/docs/source/reference/api/eland.DataFrame.dtypes.rst b/docs/source/reference/api/eland.DataFrame.dtypes.rst new file mode 100644 index 0000000..6ec2883 --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.dtypes.rst @@ -0,0 +1,6 @@ +eland.DataFrame.dtypes +====================== + +.. currentmodule:: eland + +.. autoattribute:: DataFrame.dtypes diff --git a/docs/source/reference/api/eland.DataFrame.empty.rst b/docs/source/reference/api/eland.DataFrame.empty.rst new file mode 100644 index 0000000..e693934 --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.empty.rst @@ -0,0 +1,6 @@ +eland.DataFrame.empty +===================== + +.. currentmodule:: eland + +.. autoattribute:: DataFrame.empty diff --git a/docs/source/reference/api/eland.DataFrame.get.rst b/docs/source/reference/api/eland.DataFrame.get.rst new file mode 100644 index 0000000..dc069ad --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.get.rst @@ -0,0 +1,6 @@ +eland.DataFrame.get +=================== + +.. currentmodule:: eland + +.. automethod:: DataFrame.get diff --git a/docs/source/reference/api/eland.DataFrame.hist.rst b/docs/source/reference/api/eland.DataFrame.hist.rst new file mode 100644 index 0000000..73c478c --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.hist.rst @@ -0,0 +1,6 @@ +eland.DataFrame.hist +==================== + +.. currentmodule:: eland + +.. automethod:: DataFrame.hist diff --git a/docs/source/reference/api/eland.DataFrame.info.rst b/docs/source/reference/api/eland.DataFrame.info.rst new file mode 100644 index 0000000..452adf2 --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.info.rst @@ -0,0 +1,6 @@ +eland.DataFrame.info +==================== + +.. currentmodule:: eland + +.. automethod:: DataFrame.info diff --git a/docs/source/reference/api/eland.DataFrame.select_dtypes.rst b/docs/source/reference/api/eland.DataFrame.select_dtypes.rst new file mode 100644 index 0000000..3a8272b --- /dev/null +++ b/docs/source/reference/api/eland.DataFrame.select_dtypes.rst @@ -0,0 +1,6 @@ +eland.DataFrame.select_dtypes +============================= + +.. currentmodule:: eland + +.. automethod:: DataFrame.select_dtypes diff --git a/docs/source/reference/api/eland.Index.rst b/docs/source/reference/api/eland.Index.rst new file mode 100644 index 0000000..20c53d8 --- /dev/null +++ b/docs/source/reference/api/eland.Index.rst @@ -0,0 +1,6 @@ +eland.Index +=========== + +.. currentmodule:: eland + +.. autoclass:: Index diff --git a/docs/source/reference/dataframe.rst b/docs/source/reference/dataframe.rst index f4510b3..aaed5a3 100644 --- a/docs/source/reference/dataframe.rst +++ b/docs/source/reference/dataframe.rst @@ -21,6 +21,9 @@ Attributes and underlying data DataFrame.index DataFrame.columns + DataFrame.dtypes + DataFrame.select_dtypes + DataFrame.empty Indexing, iteration ~~~~~~~~~~~~~~~~~~~ @@ -29,7 +32,45 @@ Indexing, iteration DataFrame.head DataFrame.tail + DataFrame.get +Function application, GroupBy & window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + DataFrame.agg + DataFrame.aggregate +.. _api.dataframe.stats: + +Computations / descriptive stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.count + DataFrame.describe + DataFrame.info + +Reindexing / selection / label manipulation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.drop + +Plotting +~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.hist + +Serialization / IO / conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.info diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index 8f79abe..a623800 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -12,3 +12,4 @@ methods. All classes and functions exposed in ``eland.*`` namespace are public. general_utility_functions dataframe + indexing diff --git a/docs/source/reference/indexing.rst b/docs/source/reference/indexing.rst new file mode 100644 index 0000000..1824209 --- /dev/null +++ b/docs/source/reference/indexing.rst @@ -0,0 +1,15 @@ +.. _api.index: + +===== +Index +===== +.. currentmodule:: eland + +**Many of these methods or variants thereof are available on the objects +that contain an index (Series/DataFrame) and those should most likely be +used before calling these methods directly.** + +.. autosummary:: + :toctree: api/ + + Index diff --git a/eland/conftest.py b/eland/conftest.py new file mode 100644 index 0000000..ce62d3b --- /dev/null +++ b/eland/conftest.py @@ -0,0 +1,17 @@ +import pytest + +import numpy as np +import pandas as pd +import eland as ed + +# Fix console sizxe for consistent test results +pd.set_option('display.max_rows', 10) +pd.set_option('display.max_columns', 5) +pd.set_option('display.width', 100) + +@pytest.fixture(autouse=True) +def add_imports(doctest_namespace): + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + doctest_namespace["ed"] = ed + diff --git a/eland/dataframe.py b/eland/dataframe.py index 516391a..bf476a9 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -8,7 +8,6 @@ import six from pandas.core.common import apply_if_callable, is_bool_indexer from pandas.core.dtypes.common import is_list_like from pandas.core.indexing import check_bool_indexer - from pandas.io.common import _expand_user, _stringify_path from pandas.io.formats import console from pandas.io.formats import format as fmt @@ -19,6 +18,7 @@ from eland import NDFrame from eland import Series from eland.filter import BooleanFilter, ScriptFilter + class DataFrame(NDFrame): """ Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes @@ -39,21 +39,26 @@ class DataFrame(NDFrame): index_field: str, optional The Elasticsearch index field to use as the DataFrame index. Defaults to _id if None is used. + See Also + -------- + :pandas_docs:`pandas.DataFrame` + Examples -------- Constructing DataFrame from an Elasticsearch configuration arguments and an Elasticsearch index >>> df = ed.DataFrame('localhost:9200', 'flights') >>> df.head() - AvgTicketPrice Cancelled Carrier Dest ... OriginRegion OriginWeather dayOfWeek timestamp - 0 841.265642 False Kibana Airlines Sydney Kingsford Smith International Airport ... DE-HE Sunny 0 2018-01-01 00:00:00 - 1 882.982662 False Logstash Airways Venice Marco Polo Airport ... SE-BD Clear 0 2018-01-01 18:27:00 - 2 190.636904 False Logstash Airways Venice Marco Polo Airport ... IT-34 Rain 0 2018-01-01 17:11:14 - 3 181.694216 True Kibana Airlines Treviso-Sant'Angelo Airport ... IT-72 Thunder & Lightning 0 2018-01-01 10:33:28 - 4 730.041778 False Kibana Airlines Xi'an Xianyang International Airport ... MX-DIF Damaging Wind 0 2018-01-01 05:13:00 + AvgTicketPrice Cancelled ... dayOfWeek timestamp + 0 841.265642 False ... 0 2018-01-01 00:00:00 + 1 882.982662 False ... 0 2018-01-01 18:27:00 + 2 190.636904 False ... 0 2018-01-01 17:11:14 + 3 181.694216 True ... 0 2018-01-01 10:33:28 + 4 730.041778 False ... 0 2018-01-01 05:13:00 [5 rows x 27 columns] + Constructing DataFrame from an Elasticsearch client and an Elasticsearch index >>> from elasticsearch import Elasticsearch @@ -82,6 +87,7 @@ class DataFrame(NDFrame): [5 rows x 2 columns] """ + def __init__(self, client=None, index_pattern=None, @@ -115,19 +121,22 @@ class DataFrame(NDFrame): ------- Elasticsearch field names as pandas.Index + See Also + -------- + :pandas_docs:`pandas.DataFrame.columns` + Examples -------- >>> df = ed.DataFrame('localhost', 'flights') >>> assert isinstance(df.columns, pd.Index) >>> df.columns - Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID', - ... 'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion', - ... 'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay', - ... 'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour', - ... 'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName', - ... 'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather', - ... 'dayOfWeek', 'timestamp'], - ... dtype='object') + Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID', 'DestCityName', + ... 'DestCountry', 'DestLocation', 'DestRegion', 'DestWeather', 'DistanceKilometers', + ... 'DistanceMiles', 'FlightDelay', 'FlightDelayMin', 'FlightDelayType', 'FlightNum', + ... 'FlightTimeHour', 'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName', + ... 'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather', 'dayOfWeek', + ... 'timestamp'], + ... dtype='object') """ return self._query_compiler.columns @@ -137,9 +146,20 @@ class DataFrame(NDFrame): def empty(self): """Determines if the DataFrame is empty. - Returns: - True if the DataFrame is empty. - False otherwise. + Returns + ------- + bool + If DataFrame is empty, return True, if not return False. + + See Also + -------- + :pandas_docs:`pandas.DataFrame.empty` + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'flights') + >>> df.empty + False """ return len(self.columns) == 0 or len(self.index) == 0 @@ -161,6 +181,10 @@ class DataFrame(NDFrame): eland.DataFrame eland DataFrame filtered on first n rows sorted by index field + See Also + -------- + :pandas_docs:`pandas.DataFrame.head` + Examples -------- >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest']) @@ -192,6 +216,10 @@ class DataFrame(NDFrame): eland.DataFrame: eland DataFrame filtered on last n rows sorted by index field + See Also + -------- + :pandas_docs:`pandas.DataFrame.tail` + Examples -------- >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest']) @@ -257,20 +285,45 @@ class DataFrame(NDFrame): def count(self): """ - Count non-NA cells for each column (TODO row) + Count non-NA cells for each column. - Counts are based on exists queries against ES + Counts are based on exists queries against ES. This is inefficient, as it creates N queries (N is number of fields). - An alternative approach is to use value_count aggregations. However, they have issues in that: - 1. They can only be used with aggregatable fields (e.g. keyword not text) - 2. For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2 - for a single document. + + - They can only be used with aggregatable fields (e.g. keyword not text) + - For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2 for a single document. + + TODO - add additional pandas.DataFrame.count features + + Returns + ------- + pandas.Series: + Summary of column counts + + See Also + -------- + :pandas_docs:`pandas.DataFrame.count` + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'geoip.city_name']) + >>> df.count() + customer_first_name 4675 + geoip.city_name 4094 + dtype: int64 """ return self._query_compiler.count() def info_es(self): + """ + + Returns + ------- + None + This method prints a debug summary of the task list Elasticsearch + """ buf = StringIO() super()._info_es(buf) @@ -297,9 +350,25 @@ class DataFrame(NDFrame): This method prints information about a DataFrame including the index dtype and column dtypes, non-null values and memory usage. + See :pandas_docs:`pandas.DataFrame.info` for full details. + + Notes + ----- This copies a lot of code from pandas.DataFrame.info as it is difficult to split out the appropriate code or creating a SparseDataFrame gives incorrect results on types and counts. + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'geoip.city_name']) + >>> df.info() + + Index: 4675 entries, 0 to 4674 + Data columns (total 2 columns): + customer_first_name 4675 non-null object + geoip.city_name 4094 non-null object + dtypes: object(2) + memory usage: 96.0 bytes """ if buf is None: # pragma: no cover buf = sys.stdout @@ -386,7 +455,7 @@ class DataFrame(NDFrame): else: _verbose_repr() - counts = self.get_dtype_counts() + counts = self.dtypes.value_counts() dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k in sorted(counts.items())] lines.append('dtypes: {types}'.format(types=', '.join(dtypes))) @@ -623,7 +692,11 @@ class DataFrame(NDFrame): ) def select_dtypes(self, include=None, exclude=None): - # get empty df + """ + Return a subset of the DataFrame's columns based on the column dtypes. + + Compatible with :pandas_docs:`pandas.DataFrame.select_dtypes` + """ empty_df = self._empty_pd_df() empty_df = empty_df.select_dtypes(include=include, exclude=exclude) @@ -649,19 +722,13 @@ class DataFrame(NDFrame): def keys(self): return self.columns - def groupby(self, by=None, axis=0, *args, **kwargs): - axis = pd.DataFrame._get_axis_number(axis) - - if axis == 1: - raise NotImplementedError("Aggregating via index not currently implemented - needs index transform") - def aggregate(self, func, axis=0, *args, **kwargs): """ Aggregate using one or more operations over the specified axis. Parameters ---------- - func : function, str, list or dict + func: function, str, list or dict Function to use for aggregating the data. If a function, must either work when passed a %(klass)s or when passed to %(klass)s.apply. @@ -671,11 +738,15 @@ class DataFrame(NDFrame): - string function name - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - dict of axis labels -> functions, function names or list of such. + + Currently, we only support ``['count', 'mad', 'max', 'mean', 'median', 'min', 'mode', 'quantile', + 'rank', 'sem', 'skew', 'sum', 'std', 'var']`` axis + Currently, we only support axis=0 (index) *args - Positional arguments to pass to `func`. + Positional arguments to pass to `func` **kwargs - Keyword arguments to pass to `func`. + Keyword arguments to pass to `func` Returns ------- @@ -684,6 +755,19 @@ class DataFrame(NDFrame): if DataFrame.agg is called with several functions, returns a DataFrame if Series.agg is called with single function, returns a scalar if Series.agg is called with several functions, returns a Series + + See Also + -------- + :pandas_docs:`pandas.DataFrame.aggregate` + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'flights') + >>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std']) + DistanceKilometers AvgTicketPrice + sum 9.261629e+07 8.204365e+06 + min 0.000000e+00 1.000205e+02 + std 4.578263e+03 2.663867e+02 """ axis = pd.DataFrame._get_axis_number(axis) @@ -722,17 +806,39 @@ class DataFrame(NDFrame): raise NotImplementedError(expr, type(expr)) def get(self, key, default=None): - """Get item from object for given key (DataFrame column, Panel - slice, etc.). Returns default value if not found. + """ + Get item from object for given key (ex: DataFrame column). + Returns default value if not found. - Args: - key (DataFrame column, Panel slice) : the key for which value - to get + Parameters + ---------- + key: object - Returns: - value (type of items contained in object) : A value that is - stored at the key - """ + Returns + ------- + value: same type as items contained in object + + See Also + -------- + :pandas_docs:`pandas.DataFrame.get` + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'flights') + >>> df.get('Carrier') + 0 Kibana Airlines + 1 Logstash Airways + 2 Logstash Airways + 3 Kibana Airlines + 4 Kibana Airlines + ... + 13054 Logstash Airways + 13055 Logstash Airways + 13056 Logstash Airways + 13057 JetBeats + 13058 JetBeats + Name: Carrier, Length: 13059, dtype: object + """ if key in self.keys(): return self._getitem(key) else: diff --git a/eland/index.py b/eland/index.py index 7d4a355..dfd0846 100644 --- a/eland/index.py +++ b/eland/index.py @@ -1,27 +1,23 @@ -""" -class Index - -The index for an eland.DataFrame. - -Currently, the index is a field that exists in every document in an Elasticsearch index. -For slicing and sorting operations it must be a docvalues field. By default _id is used, -which can't be used for range queries and is inefficient for sorting: - -https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html -(The value of the _id field is also accessible in aggregations or for sorting, -but doing so is discouraged as it requires to load a lot of data in memory. -In case sorting or aggregating on the _id field is required, it is advised to duplicate -the content of the _id field in another field that has doc_values enabled.) - -""" - - class Index: + """ + The index for an eland.DataFrame. + + TODO - This currently has very different behaviour than pandas.Index + + Currently, the index is a field that exists in every document in an Elasticsearch index. + For slicing and sorting operations it must be a docvalues field. By default _id is used, + which can't be used for range queries and is inefficient for sorting: + + https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html + (The value of the _id field is also accessible in aggregations or for sorting, + but doing so is discouraged as it requires to load a lot of data in memory. + In case sorting or aggregating on the _id field is required, it is advised to duplicate + the content of the _id field in another field that has doc_values enabled.) + """ ID_INDEX_FIELD = '_id' ID_SORT_FIELD = '_doc' # if index field is _id, sort by _doc def __init__(self, query_compiler, index_field=None): - # Calls setter self.index_field = index_field self._query_compiler = query_compiler diff --git a/eland/mappings.py b/eland/mappings.py index 2b61b6c..ec7072e 100644 --- a/eland/mappings.py +++ b/eland/mappings.py @@ -420,13 +420,13 @@ class Mappings: return self._mappings_capabilities[(self._mappings_capabilities._source == True) & ((self._mappings_capabilities.pd_dtype == 'int64') | (self._mappings_capabilities.pd_dtype == 'float64') | - (self._mappings_capabilities.pd_dtype == 'bool'))].loc[ - columns].index.tolist() + (self._mappings_capabilities.pd_dtype == 'bool'))].reindex( + columns).index.tolist() else: return self._mappings_capabilities[(self._mappings_capabilities._source == True) & ((self._mappings_capabilities.pd_dtype == 'int64') | - (self._mappings_capabilities.pd_dtype == 'float64'))].loc[ - columns].index.tolist() + (self._mappings_capabilities.pd_dtype == 'float64'))].reindex( + columns).index.tolist() else: if include_bool == True: return self._mappings_capabilities[(self._mappings_capabilities._source == True) & @@ -469,26 +469,6 @@ class Mappings: return pd.Series(self._source_field_pd_dtypes) - def get_dtype_counts(self, columns=None): - """ - Return counts of unique dtypes in this object. - - Returns - ------- - get_dtype_counts : Series - Series with the count of columns with each dtype. - """ - - if columns is not None: - return pd.Series(self._mappings_capabilities[self._mappings_capabilities._source == True] - .loc[columns] - .groupby('pd_dtype')['_source'] - .count().to_dict()) - - return pd.Series(self._mappings_capabilities[self._mappings_capabilities._source == True] - .groupby('pd_dtype')['_source'] - .count().to_dict()) - def info_es(self, buf): buf.write("Mappings:\n") buf.write("\tcapabilities: {0}\n".format(self._mappings_capabilities)) diff --git a/eland/ndframe.py b/eland/ndframe.py index 3c8f53b..605ce43 100644 --- a/eland/ndframe.py +++ b/eland/ndframe.py @@ -57,10 +57,23 @@ class NDFrame: def _get_index(self): """ + Return eland index referencing Elasticsearch field to index a DataFrame/Series Returns ------- + eland.Index: + Note eland.Index has a very limited API compared to pandas.Index + See Also + -------- + :pandas_docs:`pandas.DataFrame.index` + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'flights') + >>> assert isinstance(df.index, ed.Index) + >>> df.index.index_field + '_id' """ return self._query_compiler.index @@ -68,10 +81,30 @@ class NDFrame: @property def dtypes(self): - return self._query_compiler.dtypes + """ + Return the pandas dtypes in the DataFrame. Elasticsearch types are mapped + to pandas dtypes via Mappings._es_dtype_to_pd_dtype.__doc__ - def get_dtype_counts(self): - return self._query_compiler.get_dtype_counts() + Returns + ------- + pandas.Series + The data type of each column. + + See Also + -------- + :pandas_docs:`pandas.DataFrame.dtypes` + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'AvgTicketPrice', 'timestamp', 'dayOfWeek']) + >>> df.dtypes + Origin object + AvgTicketPrice float64 + timestamp datetime64[ns] + dayOfWeek int64 + dtype: object + """ + return self._query_compiler.dtypes def _build_repr_df(self, num_rows, num_cols): # Overriden version of BasePandasDataset._build_repr_df @@ -134,21 +167,71 @@ class NDFrame: errors="raise", ): """Return new object with labels in requested axis removed. - Args: - labels: Index or column labels to drop. - axis: Whether to drop labels from the index (0 / 'index') or - columns (1 / 'columns'). - index, columns: Alternative to specifying axis (labels, axis=1 is - equivalent to columns=labels). - level: For MultiIndex - inplace: If True, do operation inplace and return None. - errors: If 'ignore', suppress error and existing labels are - dropped. - Returns: - dropped : type of caller - (derived from modin.base.BasePandasDataset) + Parameters + ---------- + labels: + Index or column labels to drop. + axis: + Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns'). + index, columns: + Alternative to specifying axis (labels, axis=1 is equivalent to columns=labels). + level: + For MultiIndex - not supported + inplace: + If True, do operation inplace and return None. + errors: + If 'ignore', suppress error and existing labels are dropped. + + Returns + ------- + dropped: + type of caller + + See Also + -------- + :pandas_docs:`pandas.DataFrame.drop` + + Examples + -------- + Drop a column + + >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'email', 'user']) + >>> df.drop(columns=['user']) + customer_first_name email + 0 Eddie eddie@underwood-family.zzz + 1 Mary mary@bailey-family.zzz + 2 Gwen gwen@butler-family.zzz + 3 Diane diane@chandler-family.zzz + 4 Eddie eddie@weber-family.zzz + ... ... ... + 4670 Mary mary@lambert-family.zzz + 4671 Jim jim@gilbert-family.zzz + 4672 Yahya yahya@rivera-family.zzz + 4673 Mary mary@hampton-family.zzz + 4674 Jackson jackson@hopkins-family.zzz + + [4675 rows x 2 columns] + + Drop rows by index value (axis=0) + + >>> df.drop(['1', '2']) + customer_first_name email user + 0 Eddie eddie@underwood-family.zzz eddie + 3 Diane diane@chandler-family.zzz diane + 4 Eddie eddie@weber-family.zzz eddie + 5 Diane diane@goodwin-family.zzz diane + 6 Oliver oliver@rios-family.zzz oliver + ... ... ... ... + 4670 Mary mary@lambert-family.zzz mary + 4671 Jim jim@gilbert-family.zzz jim + 4672 Yahya yahya@rivera-family.zzz yahya + 4673 Mary mary@hampton-family.zzz mary + 4674 Jackson jackson@hopkins-family.zzz jackson + + [4673 rows x 3 columns] """ + #(derived from modin.base.BasePandasDataset) # Level not supported if level is not None: raise NotImplementedError("level not supported {}".format(level)) @@ -242,4 +325,36 @@ class NDFrame: return self._query_compiler._hist(num_bins) def describe(self): + """ + Generate descriptive statistics that summarize the central tendency, dispersion and shape of a + dataset’s distribution, excluding NaN values. + + Analyzes both numeric and object series, as well as DataFrame column sets of mixed data types. + The output will vary depending on what is provided. Refer to the notes below for more detail. + + TODO - add additional arguments (current only numeric values supported) + + Returns + ------- + pandas.Dataframe: + Summary information + + See Also + -------- + :pandas_docs:`pandas.DataFrame.describe` + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'flights', columns=['AvgTicketPrice', 'FlightDelay']) + >>> df.describe() # ignoring percentiles as they don't generate consistent results + AvgTicketPrice FlightDelay + count 13059.000000 13059.000000 + mean 628.253689 0.251168 + std 266.386661 0.433685 + min 100.020531 0.000000 + ... + ... + ... + max 1199.729004 1.000000 + """ return self._query_compiler.describe() diff --git a/eland/plotting.py b/eland/plotting.py index 09f9c7f..0b5e4c7 100644 --- a/eland/plotting.py +++ b/eland/plotting.py @@ -10,36 +10,42 @@ def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, **kwds): """ - Derived from pandas.plotting._core.hist_frame 0.24.2 - TODO update to 0.25.1 + See :pandas_docs:`pandas.DataFrame.hist` for usage. - Ideally, we'd call hist_frame directly with histogram data, + Notes + ----- + Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1`` + + Ideally, we'd call `hist_frame` directly with histogram data, but weights are applied to ALL series. For example, we can plot a histogram of pre-binned data via: - counts, bins = np.histogram(data) - plt.hist(bins[:-1], bins, weights=counts) + .. code-block:: python + + counts, bins = np.histogram(data) + plt.hist(bins[:-1], bins, weights=counts) However, - ax.hist(data[col].dropna().values, bins=bins, **kwds) + .. code-block:: python - is for [col] and weights are a single array. + ax.hist(data[col].dropna().values, bins=bins, **kwds) - We therefore cut/paste code. + is for ``[col]`` and weights are a single array. + + Examples + -------- + .. plot:: + :context: close-figs + + >>> df = ed.DataFrame('localhost', 'flights') + >>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) """ # Start with empty pandas data frame derived from ed_df_bins, ed_df_weights = ed_df._hist(num_bins=bins) if by is not None: raise NotImplementedError("TODO") - """ - axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, - figsize=figsize, sharex=sharex, sharey=sharey, - layout=layout, bins=bins, xlabelsize=xlabelsize, - xrot=xrot, ylabelsize=ylabelsize, - yrot=yrot, **kwds) - """ - return axes if column is not None: if not isinstance(column, (list, np.ndarray, ABCIndexClass)): diff --git a/eland/query_compiler.py b/eland/query_compiler.py index 7f78614..e2cc5a5 100644 --- a/eland/query_compiler.py +++ b/eland/query_compiler.py @@ -84,11 +84,6 @@ class ElandQueryCompiler: return self._mappings.dtypes(columns) - def get_dtype_counts(self): - columns = self._operations.get_columns() - - return self._mappings.get_dtype_counts(columns) - # END Index, columns, and dtypes objects def _es_results_to_pandas(self, results, batch_size=None): diff --git a/eland/series.py b/eland/series.py index 66f27e3..28b5d23 100644 --- a/eland/series.py +++ b/eland/series.py @@ -150,7 +150,7 @@ class Series(NDFrame): ) def _to_pandas(self): - return self._query_compiler._to_pandas()[self.name] + return self._query_compiler.to_pandas()[self.name] def __gt__(self, other): if isinstance(other, Series): diff --git a/eland/tests/dataframe/test_count_pytest.py b/eland/tests/dataframe/test_count_pytest.py index 72d09af..3dab08e 100644 --- a/eland/tests/dataframe/test_count_pytest.py +++ b/eland/tests/dataframe/test_count_pytest.py @@ -4,6 +4,7 @@ from pandas.util.testing import assert_series_equal from eland.tests.common import TestData +import pandas as pd class TestDataFrameCount(TestData): diff --git a/eland/tests/mappings/test_dtypes_pytest.py b/eland/tests/mappings/test_dtypes_pytest.py index 0987169..43d3e3e 100644 --- a/eland/tests/mappings/test_dtypes_pytest.py +++ b/eland/tests/mappings/test_dtypes_pytest.py @@ -24,22 +24,3 @@ class TestMappingsDtypes(TestData): ed_dtypes = ed_flights._query_compiler._mappings.dtypes(columns=['Carrier', 'AvgTicketPrice', 'Cancelled']) assert_series_equal(pd_dtypes, ed_dtypes) - - def test_flights_get_dtype_counts_all(self): - ed_flights = self.ed_flights() - pd_flights = self.pd_flights() - - pd_dtypes = pd_flights.get_dtype_counts().sort_index() - ed_dtypes = ed_flights._query_compiler._mappings.get_dtype_counts().sort_index() - - assert_series_equal(pd_dtypes, ed_dtypes) - - def test_flights_get_dtype_counts_columns(self): - ed_flights = self.ed_flights() - pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']] - - pd_dtypes = pd_flights.get_dtype_counts().sort_index() - ed_dtypes = ed_flights._query_compiler._mappings. \ - get_dtype_counts(columns=['Carrier', 'AvgTicketPrice', 'Cancelled']).sort_index() - - assert_series_equal(pd_dtypes, ed_dtypes) diff --git a/eland/utils.py b/eland/utils.py index 1299f6c..e55e348 100644 --- a/eland/utils.py +++ b/eland/utils.py @@ -141,3 +141,37 @@ def ed_to_pd(ed_df): eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame """ return ed_df._to_pandas() + +def _inherit_docstrings(parent, excluded=[]): + """Creates a decorator which overwrites a decorated class' __doc__ + attribute with parent's __doc__ attribute. Also overwrites __doc__ of + methods and properties defined in the class with the __doc__ of matching + methods and properties in parent. + + Args: + parent (object): Class from which the decorated class inherits __doc__. + excluded (list): List of parent objects from which the class does not + inherit docstrings. + + Returns: + function: decorator which replaces the decorated class' documentation + parent's documentation. + """ + + def decorator(cls): + if parent not in excluded: + cls.__doc__ = parent.__doc__ + for attr, obj in cls.__dict__.items(): + parent_obj = getattr(parent, attr, None) + if parent_obj in excluded or ( + not callable(parent_obj) and not isinstance(parent_obj, property) + ): + continue + if callable(obj): + obj.__doc__ = parent_obj.__doc__ + elif isinstance(obj, property) and obj.fget is not None: + p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__) + setattr(cls, attr, p) + return cls + + return decorator From 8b3c66a5c85a55eef2b49deb84bb916a096b3a87 Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Thu, 14 Nov 2019 09:27:44 +0000 Subject: [PATCH 2/2] Changing LICENSE to Apache2 Changing LICENSE to Apache2 + cleaning up setup.py --- LICENSE | 359 ++++++++++++++++++++----------------------- MANIFEST.in | 2 +- eland/__version__.py | 6 + setup.py | 50 ++++-- 4 files changed, 208 insertions(+), 209 deletions(-) create mode 100644 eland/__version__.py diff --git a/LICENSE b/LICENSE index 7376ffc..4c5e34c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,223 +1,200 @@ -ELASTIC LICENSE AGREEMENT -PLEASE READ CAREFULLY THIS ELASTIC LICENSE AGREEMENT (THIS "AGREEMENT"), WHICH -CONSTITUTES A LEGALLY BINDING AGREEMENT AND GOVERNS ALL OF YOUR USE OF ALL OF -THE ELASTIC SOFTWARE WITH WHICH THIS AGREEMENT IS INCLUDED ("ELASTIC SOFTWARE") -THAT IS PROVIDED IN OBJECT CODE FORMAT, AND, IN ACCORDANCE WITH SECTION 2 BELOW, -CERTAIN OF THE ELASTIC SOFTWARE THAT IS PROVIDED IN SOURCE CODE FORMAT. BY -INSTALLING OR USING ANY OF THE ELASTIC SOFTWARE GOVERNED BY THIS AGREEMENT, YOU -ARE ASSENTING TO THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE -WITH SUCH TERMS AND CONDITIONS, YOU MAY NOT INSTALL OR USE THE ELASTIC SOFTWARE -GOVERNED BY THIS AGREEMENT. IF YOU ARE INSTALLING OR USING THE SOFTWARE ON -BEHALF OF A LEGAL ENTITY, YOU REPRESENT AND WARRANT THAT YOU HAVE THE ACTUAL -AUTHORITY TO AGREE TO THE TERMS AND CONDITIONS OF THIS AGREEMENT ON BEHALF OF -SUCH ENTITY. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -Posted Date: April 20, 2018 + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -This Agreement is entered into by and between Elasticsearch BV ("Elastic") and -You, or the legal entity on behalf of whom You are acting (as applicable, -"You"). + 1. Definitions. -1. OBJECT CODE END USER LICENSES, RESTRICTIONS AND THIRD PARTY OPEN SOURCE -SOFTWARE + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. - 1.1 Object Code End User License. Subject to the terms and conditions of - Section 1.2 of this Agreement, Elastic hereby grants to You, AT NO CHARGE and - for so long as you are not in breach of any provision of this Agreement, a - License to the Basic Features and Functions of the Elastic Software. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. - 1.2 Reservation of Rights; Restrictions. As between Elastic and You, Elastic - and its licensors own all right, title and interest in and to the Elastic - Software, and except as expressly set forth in Sections 1.1, and 2.1 of this - Agreement, no other license to the Elastic Software is granted to You under - this Agreement, by implication, estoppel or otherwise. You agree not to: (i) - reverse engineer or decompile, decrypt, disassemble or otherwise reduce any - Elastic Software provided to You in Object Code, or any portion thereof, to - Source Code, except and only to the extent any such restriction is prohibited - by applicable law, (ii) except as expressly permitted in this Agreement, - prepare derivative works from, modify, copy or use the Elastic Software Object - Code or the Commercial Software Source Code in any manner; (iii) except as - expressly permitted in Section 1.1 above, transfer, sell, rent, lease, - distribute, sublicense, loan or otherwise transfer, Elastic Software Object - Code, in whole or in part, to any third party; (iv) use Elastic Software - Object Code for providing time-sharing services, any software-as-a-service, - service bureau services or as part of an application services provider or - other service offering (collectively, "SaaS Offering") where obtaining access - to the Elastic Software or the features and functions of the Elastic Software - is a primary reason or substantial motivation for users of the SaaS Offering - to access and/or use the SaaS Offering ("Prohibited SaaS Offering"); (v) - circumvent the limitations on use of Elastic Software provided to You in - Object Code format that are imposed or preserved by any License Key, or (vi) - alter or remove any Marks and Notices in the Elastic Software. If You have any - question as to whether a specific SaaS Offering constitutes a Prohibited SaaS - Offering, or are interested in obtaining Elastic's permission to engage in - commercial or non-commercial distribution of the Elastic Software, please - contact elastic_license@elastic.co. + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. - 1.3 Third Party Open Source Software. The Commercial Software may contain or - be provided with third party open source libraries, components, utilities and - other open source software (collectively, "Open Source Software"), which Open - Source Software may have applicable license terms as identified on a website - designated by Elastic. Notwithstanding anything to the contrary herein, use of - the Open Source Software shall be subject to the license terms and conditions - applicable to such Open Source Software, to the extent required by the - applicable licensor (which terms shall not restrict the license rights granted - to You hereunder, but may contain additional rights). To the extent any - condition of this Agreement conflicts with any license to the Open Source - Software, the Open Source Software license will govern with respect to such - Open Source Software only. Elastic may also separately provide you with - certain open source software that is licensed by Elastic. Your use of such - Elastic open source software will not be governed by this Agreement, but by - the applicable open source license terms. + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. -2. COMMERCIAL SOFTWARE SOURCE CODE + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. - 2.1 Limited License. Subject to the terms and conditions of Section 2.2 of - this Agreement, Elastic hereby grants to You, AT NO CHARGE and for so long as - you are not in breach of any provision of this Agreement, a limited, - non-exclusive, non-transferable, fully paid up royalty free right and license - to the Commercial Software in Source Code format, without the right to grant - or authorize sublicenses, to prepare Derivative Works of the Commercial - Software, provided You (i) do not hack the licensing mechanism, or otherwise - circumvent the intended limitations on the use of Elastic Software to enable - features other than Basic Features and Functions or those features You are - entitled to as part of a Subscription, and (ii) use the resulting object code - only for reasonable testing purposes. + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. - 2.2 Restrictions. Nothing in Section 2.1 grants You the right to (i) use the - Commercial Software Source Code other than in accordance with Section 2.1 - above, (ii) use a Derivative Work of the Commercial Software outside of a - Non-production Environment, in any production capacity, on a temporary or - permanent basis, or (iii) transfer, sell, rent, lease, distribute, sublicense, - loan or otherwise make available the Commercial Software Source Code, in whole - or in part, to any third party. Notwithstanding the foregoing, You may - maintain a copy of the repository in which the Source Code of the Commercial - Software resides and that copy may be publicly accessible, provided that you - include this Agreement with Your copy of the repository. + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). -3. TERMINATION + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. - 3.1 Termination. This Agreement will automatically terminate, whether or not - You receive notice of such Termination from Elastic, if You breach any of its - provisions. + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." - 3.2 Post Termination. Upon any termination of this Agreement, for any reason, - You shall promptly cease the use of the Elastic Software in Object Code format - and cease use of the Commercial Software in Source Code format. For the - avoidance of doubt, termination of this Agreement will not affect Your right - to use Elastic Software, in either Object Code or Source Code formats, made - available under the Apache License Version 2.0. + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. - 3.3 Survival. Sections 1.2, 2.2. 3.3, 4 and 5 shall survive any termination or - expiration of this Agreement. + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. -4. DISCLAIMER OF WARRANTIES AND LIMITATION OF LIABILITY + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. - 4.1 Disclaimer of Warranties. TO THE MAXIMUM EXTENT PERMITTED UNDER APPLICABLE - LAW, THE ELASTIC SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, - AND ELASTIC AND ITS LICENSORS MAKE NO WARRANTIES WHETHER EXPRESSED, IMPLIED OR - STATUTORY REGARDING OR RELATING TO THE ELASTIC SOFTWARE. TO THE MAXIMUM EXTENT - PERMITTED UNDER APPLICABLE LAW, ELASTIC AND ITS LICENSORS SPECIFICALLY - DISCLAIM ALL IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE AND NON-INFRINGEMENT WITH RESPECT TO THE ELASTIC SOFTWARE, AND WITH - RESPECT TO THE USE OF THE FOREGOING. FURTHER, ELASTIC DOES NOT WARRANT RESULTS - OF USE OR THAT THE ELASTIC SOFTWARE WILL BE ERROR FREE OR THAT THE USE OF THE - ELASTIC SOFTWARE WILL BE UNINTERRUPTED. + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: - 4.2 Limitation of Liability. IN NO EVENT SHALL ELASTIC OR ITS LICENSORS BE - LIABLE TO YOU OR ANY THIRD PARTY FOR ANY DIRECT OR INDIRECT DAMAGES, - INCLUDING, WITHOUT LIMITATION, FOR ANY LOSS OF PROFITS, LOSS OF USE, BUSINESS - INTERRUPTION, LOSS OF DATA, COST OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY - SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, IN CONNECTION WITH - OR ARISING OUT OF THE USE OR INABILITY TO USE THE ELASTIC SOFTWARE, OR THE - PERFORMANCE OF OR FAILURE TO PERFORM THIS AGREEMENT, WHETHER ALLEGED AS A - BREACH OF CONTRACT OR TORTIOUS CONDUCT, INCLUDING NEGLIGENCE, EVEN IF ELASTIC - HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and -5. MISCELLANEOUS + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and - This Agreement completely and exclusively states the entire agreement of the - parties regarding the subject matter herein, and it supersedes, and its terms - govern, all prior proposals, agreements, or other communications between the - parties, oral or written, regarding such subject matter. This Agreement may be - modified by Elastic from time to time, and any such modifications will be - effective upon the "Posted Date" set forth at the top of the modified - Agreement. If any provision hereof is held unenforceable, this Agreement will - continue without said provision and be interpreted to reflect the original - intent of the parties. This Agreement and any non-contractual obligation - arising out of or in connection with it, is governed exclusively by Dutch law. - This Agreement shall not be governed by the 1980 UN Convention on Contracts - for the International Sale of Goods. All disputes arising out of or in - connection with this Agreement, including its existence and validity, shall be - resolved by the courts with jurisdiction in Amsterdam, The Netherlands, except - where mandatory law provides for the courts at another location in The - Netherlands to have jurisdiction. The parties hereby irrevocably waive any and - all claims and defenses either might otherwise have in any such action or - proceeding in any of such courts based upon any alleged lack of personal - jurisdiction, improper venue, forum non conveniens or any similar claim or - defense. A breach or threatened breach, by You of Section 2 may cause - irreparable harm for which damages at law may not provide adequate relief, and - therefore Elastic shall be entitled to seek injunctive relief without being - required to post a bond. You may not assign this Agreement (including by - operation of law in connection with a merger or acquisition), in whole or in - part to any third party without the prior written consent of Elastic, which - may be withheld or granted by Elastic in its sole and absolute discretion. - Any assignment in violation of the preceding sentence is void. Notices to - Elastic may also be sent to legal@elastic.co. + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and -6. DEFINITIONS + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. - The following terms have the meanings ascribed: + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. - 6.1 "Affiliate" means, with respect to a party, any entity that controls, is - controlled by, or which is under common control with, such party, where - "control" means ownership of at least fifty percent (50%) of the outstanding - voting shares of the entity, or the contractual right to establish policy for, - and manage the operations of, the entity. + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. - 6.2 "Basic Features and Functions" means those features and functions of the - Elastic Software that are eligible for use under a Basic license, as set forth - at https://www.elastic.co/subscriptions, as may be modified by Elastic from - time to time. + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. - 6.3 "Commercial Software" means the Elastic Software Source Code in any file - containing a header stating the contents are subject to the Elastic License or - which is contained in the repository folder labeled "x-pack", unless a LICENSE - file present in the directory subtree declares a different license. + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. - 6.4 "Derivative Work of the Commercial Software" means, for purposes of this - Agreement, any modification(s) or enhancement(s) to the Commercial Software, - which represent, as a whole, an original work of authorship. + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. - 6.5 "License" means a limited, non-exclusive, non-transferable, fully paid up, - royalty free, right and license, without the right to grant or authorize - sublicenses, solely for Your internal business operations to (i) install and - use the applicable Features and Functions of the Elastic Software in Object - Code, and (ii) permit Contractors and Your Affiliates to use the Elastic - software as set forth in (i) above, provided that such use by Contractors must - be solely for Your benefit and/or the benefit of Your Affiliates, and You - shall be responsible for all acts and omissions of such Contractors and - Affiliates in connection with their use of the Elastic software that are - contrary to the terms and conditions of this Agreement. + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. - 6.6 "License Key" means a sequence of bytes, including but not limited to a - JSON blob, that is used to enable certain features and functions of the - Elastic Software. + APPENDIX: How to apply the Apache License to your work. - 6.7 "Marks and Notices" means all Elastic trademarks, trade names, logos and - notices present on the Documentation as originally provided by Elastic. + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. - 6.8 "Non-production Environment" means an environment for development, testing - or quality assurance, where software is not used for production purposes. + Copyright 2019 Elasticsearch BV - 6.9 "Object Code" means any form resulting from mechanical transformation or - translation of Source Code form, including but not limited to compiled object - code, generated documentation, and conversions to other media types. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - 6.10 "Source Code" means the preferred form of computer software for making - modifications, including but not limited to software source code, - documentation source, and configuration files. + http://www.apache.org/licenses/LICENSE-2.0 - 6.11 "Subscription" means the right to receive Support Services and a License - to the Commercial Software. + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in index 9561fb1..1aba38f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include README.rst +include LICENSE diff --git a/eland/__version__.py b/eland/__version__.py new file mode 100644 index 0000000..b3205f8 --- /dev/null +++ b/eland/__version__.py @@ -0,0 +1,6 @@ +__title__ = 'eland' +__description__ = 'Python elasticsearch client to analyse, explore and manipulate data that resides in elasticsearch.' +__url__ = 'https://github.com/elastic/app-search-python' +__version__ = '0.1' +__maintainer__ = 'Elasticsearch B.V.' +__maintainer_email__ = 'steve.dodson@elastic.co' diff --git a/setup.py b/setup.py index 244e82a..7678cc7 100644 --- a/setup.py +++ b/setup.py @@ -1,21 +1,37 @@ -from setuptools import setup +from setuptools import setup, find_packages +from codecs import open +from os import path +here = path.abspath(path.dirname(__file__)) -def readme(): - with open('README.rst') as f: - return f.read() +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() +here = path.abspath(path.dirname(__file__)) +about = {} +with open(path.join(here, 'eland', '__version__.py'), 'r', 'utf-8') as f: + exec(f.read(), about) -setup(name='eland', - version='0.1', - description='Python elasticsearch client to analyse, explore and manipulate data that resides in elasticsearch', - url='http://github.com/elastic/eland', - author='Stephen Dodson', - author_email='sjd171@gmail.com', - license='ELASTIC LICENSE', - packages=['eland'], - install_requires=[ - 'elasticsearch>=7.0.5', - 'pandas==0.25.1' - ], - zip_safe=False) +setup( + name=about['__title__'], + version=about['__version__'], + description=about['__description__'], + long_description=long_description, + long_description_content_type='text/markdown', + url=about['__url__'], + maintainer=about['__maintainer__'], + maintainer_email=about['__maintainer_email__'], + license='Apache 2.0', + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.7', + ], + keywords='elastic eland pandas python', + install_requires=[ + 'elasticsearch>=7.0.5', + 'pandas==0.25.1', + 'matplotlib' + ] +)