mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Add es_dtypes property to DataFrame and Series
This commit is contained in:
parent
b7c6c26606
commit
adafeed667
6
docs/source/reference/api/eland.DataFrame.es_dtypes.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.es_dtypes.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.es_dtypes
|
||||||
|
=========================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: DataFrame.es_dtypes
|
6
docs/source/reference/api/eland.Series.es_dtype.rst
Normal file
6
docs/source/reference/api/eland.Series.es_dtype.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.es_dtype
|
||||||
|
=====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.es_dtype
|
6
docs/source/reference/api/eland.Series.es_dtypes.rst
Normal file
6
docs/source/reference/api/eland.Series.es_dtypes.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.es_dtypes
|
||||||
|
======================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.es_dtypes
|
15
docs/source/reference/api/eland.ml.MLModel.rst
Normal file
15
docs/source/reference/api/eland.ml.MLModel.rst
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
eland.ml.MLModel
|
||||||
|
================
|
||||||
|
|
||||||
|
.. currentmodule:: eland.ml
|
||||||
|
|
||||||
|
.. autoclass:: MLModel
|
||||||
|
|
||||||
|
|
||||||
|
..
|
||||||
|
HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
|
||||||
|
.. autosummary::
|
||||||
|
:toctree:
|
||||||
|
|
||||||
|
DataFrame.abs
|
||||||
|
DataFrame.add
|
@ -12,7 +12,7 @@ Constructor
|
|||||||
|
|
||||||
DataFrame
|
DataFrame
|
||||||
|
|
||||||
Attributes and underlying data
|
Attributes and Underlying Data
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -27,7 +27,7 @@ Attributes and underlying data
|
|||||||
DataFrame.ndim
|
DataFrame.ndim
|
||||||
DataFrame.size
|
DataFrame.size
|
||||||
|
|
||||||
Indexing, iteration
|
Indexing, Iteration
|
||||||
~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -39,7 +39,7 @@ Indexing, iteration
|
|||||||
DataFrame.query
|
DataFrame.query
|
||||||
DataFrame.sample
|
DataFrame.sample
|
||||||
|
|
||||||
Function application, GroupBy & window
|
Function Application, GroupBy & Window
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -49,7 +49,7 @@ Function application, GroupBy & window
|
|||||||
|
|
||||||
.. _api.dataframe.stats:
|
.. _api.dataframe.stats:
|
||||||
|
|
||||||
Computations / descriptive stats
|
Computations / Descriptive Stats
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -67,7 +67,7 @@ Computations / descriptive stats
|
|||||||
DataFrame.sum
|
DataFrame.sum
|
||||||
DataFrame.nunique
|
DataFrame.nunique
|
||||||
|
|
||||||
Reindexing / selection / label manipulation
|
Reindexing / Selection / Label Manipulation
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -89,8 +89,9 @@ Elasticsearch Functions
|
|||||||
|
|
||||||
DataFrame.es_info
|
DataFrame.es_info
|
||||||
DataFrame.es_query
|
DataFrame.es_query
|
||||||
|
DataFrame.es_dtypes
|
||||||
|
|
||||||
Serialization / IO / conversion
|
Serialization / IO / Conversion
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
|
@ -18,8 +18,8 @@ The fastest way to get started with machine learning features is to
|
|||||||
|
|
||||||
See `Elasticsearch Machine Learning documentation <https://www.elastic.co/guide/en/machine-learning/current/setup.html>`_ more details.
|
See `Elasticsearch Machine Learning documentation <https://www.elastic.co/guide/en/machine-learning/current/setup.html>`_ more details.
|
||||||
|
|
||||||
ImportedMLModel
|
MLModel
|
||||||
~~~~~~~~~~~~~~~
|
~~~~~~~
|
||||||
.. currentmodule:: eland.ml
|
.. currentmodule:: eland.ml
|
||||||
|
|
||||||
Constructor
|
Constructor
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
.. _api.series:
|
.. _api.series:
|
||||||
|
|
||||||
=========
|
======
|
||||||
Series
|
Series
|
||||||
=========
|
======
|
||||||
.. currentmodule:: eland
|
.. currentmodule:: eland
|
||||||
|
|
||||||
Constructor
|
Constructor
|
||||||
@ -12,7 +12,7 @@ Constructor
|
|||||||
|
|
||||||
Series
|
Series
|
||||||
|
|
||||||
Attributes and underlying data
|
Attributes and Underlying Data
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -26,7 +26,7 @@ Attributes and underlying data
|
|||||||
Series.ndim
|
Series.ndim
|
||||||
Series.size
|
Series.size
|
||||||
|
|
||||||
Indexing, iteration
|
Indexing, Iteration
|
||||||
~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -35,7 +35,7 @@ Indexing, iteration
|
|||||||
Series.tail
|
Series.tail
|
||||||
Series.sample
|
Series.sample
|
||||||
|
|
||||||
Binary operator functions
|
Binary Operator Functions
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -63,7 +63,7 @@ Binary operator functions
|
|||||||
Series.rmod
|
Series.rmod
|
||||||
Series.rpow
|
Series.rpow
|
||||||
|
|
||||||
Computations / descriptive stats
|
Computations / Descriptive Stats
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -80,7 +80,7 @@ Computations / descriptive stats
|
|||||||
Series.nunique
|
Series.nunique
|
||||||
Series.value_counts
|
Series.value_counts
|
||||||
|
|
||||||
Reindexing / selection / label manipulation
|
Reindexing / Selection / Label Manipulation
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -100,7 +100,7 @@ Plotting
|
|||||||
|
|
||||||
Series.hist
|
Series.hist
|
||||||
|
|
||||||
Serialization / IO / conversion
|
Serialization / IO / Conversion
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
@ -115,3 +115,5 @@ Elasticsearch Functions
|
|||||||
:toctree: api/
|
:toctree: api/
|
||||||
|
|
||||||
Series.es_info
|
Series.es_info
|
||||||
|
Series.es_dtype
|
||||||
|
Series.es_dtypes
|
||||||
|
@ -780,6 +780,20 @@ class FieldMappings:
|
|||||||
# Convert return from 'str' to 'np.dtype'
|
# Convert return from 'str' to 'np.dtype'
|
||||||
return pd_dtypes.apply(lambda x: np.dtype(x))
|
return pd_dtypes.apply(lambda x: np.dtype(x))
|
||||||
|
|
||||||
|
def es_dtypes(self):
|
||||||
|
"""
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dtypes: pd.Series
|
||||||
|
Index: Display name
|
||||||
|
Values: es_dtype as a string
|
||||||
|
"""
|
||||||
|
es_dtypes = self._mappings_capabilities["es_dtype"]
|
||||||
|
|
||||||
|
# Set name of the returned series as None
|
||||||
|
es_dtypes.name = None
|
||||||
|
return es_dtypes
|
||||||
|
|
||||||
def es_info(self, buf):
|
def es_info(self, buf):
|
||||||
buf.write("Mappings:\n")
|
buf.write("Mappings:\n")
|
||||||
buf.write(f" capabilities:\n{self._mappings_capabilities.to_string()}\n")
|
buf.write(f" capabilities:\n{self._mappings_capabilities.to_string()}\n")
|
||||||
|
@ -132,6 +132,28 @@ class NDFrame(ABC):
|
|||||||
"""
|
"""
|
||||||
return self._query_compiler.dtypes
|
return self._query_compiler.dtypes
|
||||||
|
|
||||||
|
@property
|
||||||
|
def es_dtypes(self):
|
||||||
|
"""
|
||||||
|
Return the Elasticsearch dtypes in the index
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pandas.Series
|
||||||
|
The data type of each column.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'AvgTicketPrice', 'timestamp', 'dayOfWeek'])
|
||||||
|
>>> df.es_dtypes
|
||||||
|
Origin keyword
|
||||||
|
AvgTicketPrice float
|
||||||
|
timestamp date
|
||||||
|
dayOfWeek byte
|
||||||
|
dtype: object
|
||||||
|
"""
|
||||||
|
return self._query_compiler.es_dtypes
|
||||||
|
|
||||||
def _build_repr(self, num_rows) -> pd.DataFrame:
|
def _build_repr(self, num_rows) -> pd.DataFrame:
|
||||||
# self could be Series or DataFrame
|
# self could be Series or DataFrame
|
||||||
if len(self.index) <= num_rows:
|
if len(self.index) <= num_rows:
|
||||||
|
@ -123,6 +123,10 @@ class QueryCompiler:
|
|||||||
def dtypes(self):
|
def dtypes(self):
|
||||||
return self._mappings.dtypes()
|
return self._mappings.dtypes()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def es_dtypes(self):
|
||||||
|
return self._mappings.es_dtypes()
|
||||||
|
|
||||||
# END Index, columns, and dtypes objects
|
# END Index, columns, and dtypes objects
|
||||||
|
|
||||||
def _es_results_to_pandas(self, results, batch_size=None, show_progress=False):
|
def _es_results_to_pandas(self, results, batch_size=None, show_progress=False):
|
||||||
|
@ -435,6 +435,13 @@ class Series(NDFrame):
|
|||||||
"""
|
"""
|
||||||
return self._query_compiler.dtypes[0]
|
return self._query_compiler.dtypes[0]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def es_dtype(self) -> str:
|
||||||
|
"""
|
||||||
|
Return the Elasticsearch type of the underlying data.
|
||||||
|
"""
|
||||||
|
return self._query_compiler.es_dtypes[0]
|
||||||
|
|
||||||
def __gt__(self, other: Union[int, float, "Series"]) -> BooleanFilter:
|
def __gt__(self, other: Union[int, float, "Series"]) -> BooleanFilter:
|
||||||
if isinstance(other, Series):
|
if isinstance(other, Series):
|
||||||
# Need to use scripted query to compare to values
|
# Need to use scripted query to compare to values
|
||||||
|
@ -29,6 +29,7 @@ from .common import (
|
|||||||
_pd_ecommerce,
|
_pd_ecommerce,
|
||||||
_ed_flights_small,
|
_ed_flights_small,
|
||||||
_pd_flights_small,
|
_pd_flights_small,
|
||||||
|
TestData,
|
||||||
)
|
)
|
||||||
import eland as ed
|
import eland as ed
|
||||||
|
|
||||||
@ -146,3 +147,8 @@ def df():
|
|||||||
return SymmetricAPIChecker(
|
return SymmetricAPIChecker(
|
||||||
ed_obj=_ed_flights_small, pd_obj=_pd_flights_small.copy()
|
ed_obj=_ed_flights_small, pd_obj=_pd_flights_small.copy()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def testdata():
|
||||||
|
return TestData()
|
||||||
|
@ -18,12 +18,12 @@
|
|||||||
# File called _pytest for PyCharm compatability
|
# File called _pytest for PyCharm compatability
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from eland.tests.common import assert_series_equal
|
||||||
|
|
||||||
|
|
||||||
class TestDataFrameDtypes:
|
class TestDataFrameDtypes:
|
||||||
def test_dtypes(self, df):
|
def test_dtypes(self, df):
|
||||||
print(df.dtypes)
|
|
||||||
|
|
||||||
for i in range(0, len(df.dtypes) - 1):
|
for i in range(0, len(df.dtypes) - 1):
|
||||||
assert isinstance(df.dtypes[i], type(df.dtypes[i]))
|
assert isinstance(df.dtypes[i], type(df.dtypes[i]))
|
||||||
|
|
||||||
@ -32,3 +32,40 @@ class TestDataFrameDtypes:
|
|||||||
df.select_dtypes(exclude=np.number)
|
df.select_dtypes(exclude=np.number)
|
||||||
df.select_dtypes(include=np.float64)
|
df.select_dtypes(include=np.float64)
|
||||||
df.select_dtypes(exclude=np.float64)
|
df.select_dtypes(exclude=np.float64)
|
||||||
|
|
||||||
|
def test_es_dtypes(self, testdata):
|
||||||
|
df = testdata.ed_flights_small()
|
||||||
|
assert_series_equal(
|
||||||
|
df.es_dtypes,
|
||||||
|
pd.Series(
|
||||||
|
{
|
||||||
|
"AvgTicketPrice": "float",
|
||||||
|
"Cancelled": "boolean",
|
||||||
|
"Carrier": "keyword",
|
||||||
|
"Dest": "keyword",
|
||||||
|
"DestAirportID": "keyword",
|
||||||
|
"DestCityName": "keyword",
|
||||||
|
"DestCountry": "keyword",
|
||||||
|
"DestLocation": "geo_point",
|
||||||
|
"DestRegion": "keyword",
|
||||||
|
"DestWeather": "keyword",
|
||||||
|
"DistanceKilometers": "float",
|
||||||
|
"DistanceMiles": "float",
|
||||||
|
"FlightDelay": "boolean",
|
||||||
|
"FlightDelayMin": "integer",
|
||||||
|
"FlightDelayType": "keyword",
|
||||||
|
"FlightNum": "keyword",
|
||||||
|
"FlightTimeHour": "float",
|
||||||
|
"FlightTimeMin": "float",
|
||||||
|
"Origin": "keyword",
|
||||||
|
"OriginAirportID": "keyword",
|
||||||
|
"OriginCityName": "keyword",
|
||||||
|
"OriginCountry": "keyword",
|
||||||
|
"OriginLocation": "geo_point",
|
||||||
|
"OriginRegion": "keyword",
|
||||||
|
"OriginWeather": "keyword",
|
||||||
|
"dayOfWeek": "byte",
|
||||||
|
"timestamp": "date",
|
||||||
|
}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
@ -16,8 +16,10 @@
|
|||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import warnings
|
import warnings
|
||||||
from eland.common import build_pd_series, EMPTY_SERIES_DTYPE
|
from eland.common import build_pd_series, EMPTY_SERIES_DTYPE
|
||||||
|
from eland.tests.common import assert_series_equal
|
||||||
|
|
||||||
|
|
||||||
def test_empty_series_dtypes():
|
def test_empty_series_dtypes():
|
||||||
@ -33,3 +35,9 @@ def test_empty_series_dtypes():
|
|||||||
assert np.int32 != EMPTY_SERIES_DTYPE
|
assert np.int32 != EMPTY_SERIES_DTYPE
|
||||||
assert s.dtype == np.int32
|
assert s.dtype == np.int32
|
||||||
assert w == []
|
assert w == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_series_es_dtypes(testdata):
|
||||||
|
series = testdata.ed_flights_small().AvgTicketPrice
|
||||||
|
assert_series_equal(series.es_dtypes, pd.Series(data={"AvgTicketPrice": "float"}))
|
||||||
|
assert series.es_dtype == "float"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user