mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Add es_dtypes property to DataFrame and Series
This commit is contained in:
parent
b7c6c26606
commit
adafeed667
6
docs/source/reference/api/eland.DataFrame.es_dtypes.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.es_dtypes.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.DataFrame.es_dtypes
|
||||
=========================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. autoattribute:: DataFrame.es_dtypes
|
6
docs/source/reference/api/eland.Series.es_dtype.rst
Normal file
6
docs/source/reference/api/eland.Series.es_dtype.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.es_dtype
|
||||
=====================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. autoattribute:: Series.es_dtype
|
6
docs/source/reference/api/eland.Series.es_dtypes.rst
Normal file
6
docs/source/reference/api/eland.Series.es_dtypes.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.es_dtypes
|
||||
======================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. autoattribute:: Series.es_dtypes
|
15
docs/source/reference/api/eland.ml.MLModel.rst
Normal file
15
docs/source/reference/api/eland.ml.MLModel.rst
Normal file
@ -0,0 +1,15 @@
|
||||
eland.ml.MLModel
|
||||
================
|
||||
|
||||
.. currentmodule:: eland.ml
|
||||
|
||||
.. autoclass:: MLModel
|
||||
|
||||
|
||||
..
|
||||
HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
|
||||
.. autosummary::
|
||||
:toctree:
|
||||
|
||||
DataFrame.abs
|
||||
DataFrame.add
|
@ -12,7 +12,7 @@ Constructor
|
||||
|
||||
DataFrame
|
||||
|
||||
Attributes and underlying data
|
||||
Attributes and Underlying Data
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -27,7 +27,7 @@ Attributes and underlying data
|
||||
DataFrame.ndim
|
||||
DataFrame.size
|
||||
|
||||
Indexing, iteration
|
||||
Indexing, Iteration
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -39,7 +39,7 @@ Indexing, iteration
|
||||
DataFrame.query
|
||||
DataFrame.sample
|
||||
|
||||
Function application, GroupBy & window
|
||||
Function Application, GroupBy & Window
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -49,7 +49,7 @@ Function application, GroupBy & window
|
||||
|
||||
.. _api.dataframe.stats:
|
||||
|
||||
Computations / descriptive stats
|
||||
Computations / Descriptive Stats
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -67,7 +67,7 @@ Computations / descriptive stats
|
||||
DataFrame.sum
|
||||
DataFrame.nunique
|
||||
|
||||
Reindexing / selection / label manipulation
|
||||
Reindexing / Selection / Label Manipulation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -89,8 +89,9 @@ Elasticsearch Functions
|
||||
|
||||
DataFrame.es_info
|
||||
DataFrame.es_query
|
||||
DataFrame.es_dtypes
|
||||
|
||||
Serialization / IO / conversion
|
||||
Serialization / IO / Conversion
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
|
@ -18,8 +18,8 @@ The fastest way to get started with machine learning features is to
|
||||
|
||||
See `Elasticsearch Machine Learning documentation <https://www.elastic.co/guide/en/machine-learning/current/setup.html>`_ more details.
|
||||
|
||||
ImportedMLModel
|
||||
~~~~~~~~~~~~~~~
|
||||
MLModel
|
||||
~~~~~~~
|
||||
.. currentmodule:: eland.ml
|
||||
|
||||
Constructor
|
||||
|
@ -1,8 +1,8 @@
|
||||
.. _api.series:
|
||||
|
||||
=========
|
||||
======
|
||||
Series
|
||||
=========
|
||||
======
|
||||
.. currentmodule:: eland
|
||||
|
||||
Constructor
|
||||
@ -12,7 +12,7 @@ Constructor
|
||||
|
||||
Series
|
||||
|
||||
Attributes and underlying data
|
||||
Attributes and Underlying Data
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -26,7 +26,7 @@ Attributes and underlying data
|
||||
Series.ndim
|
||||
Series.size
|
||||
|
||||
Indexing, iteration
|
||||
Indexing, Iteration
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -35,7 +35,7 @@ Indexing, iteration
|
||||
Series.tail
|
||||
Series.sample
|
||||
|
||||
Binary operator functions
|
||||
Binary Operator Functions
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -63,7 +63,7 @@ Binary operator functions
|
||||
Series.rmod
|
||||
Series.rpow
|
||||
|
||||
Computations / descriptive stats
|
||||
Computations / Descriptive Stats
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -80,7 +80,7 @@ Computations / descriptive stats
|
||||
Series.nunique
|
||||
Series.value_counts
|
||||
|
||||
Reindexing / selection / label manipulation
|
||||
Reindexing / Selection / Label Manipulation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -100,7 +100,7 @@ Plotting
|
||||
|
||||
Series.hist
|
||||
|
||||
Serialization / IO / conversion
|
||||
Serialization / IO / Conversion
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autosummary::
|
||||
:toctree: api/
|
||||
@ -115,3 +115,5 @@ Elasticsearch Functions
|
||||
:toctree: api/
|
||||
|
||||
Series.es_info
|
||||
Series.es_dtype
|
||||
Series.es_dtypes
|
||||
|
@ -780,6 +780,20 @@ class FieldMappings:
|
||||
# Convert return from 'str' to 'np.dtype'
|
||||
return pd_dtypes.apply(lambda x: np.dtype(x))
|
||||
|
||||
def es_dtypes(self):
|
||||
"""
|
||||
Returns
|
||||
-------
|
||||
dtypes: pd.Series
|
||||
Index: Display name
|
||||
Values: es_dtype as a string
|
||||
"""
|
||||
es_dtypes = self._mappings_capabilities["es_dtype"]
|
||||
|
||||
# Set name of the returned series as None
|
||||
es_dtypes.name = None
|
||||
return es_dtypes
|
||||
|
||||
def es_info(self, buf):
|
||||
buf.write("Mappings:\n")
|
||||
buf.write(f" capabilities:\n{self._mappings_capabilities.to_string()}\n")
|
||||
|
@ -132,6 +132,28 @@ class NDFrame(ABC):
|
||||
"""
|
||||
return self._query_compiler.dtypes
|
||||
|
||||
@property
|
||||
def es_dtypes(self):
|
||||
"""
|
||||
Return the Elasticsearch dtypes in the index
|
||||
|
||||
Returns
|
||||
-------
|
||||
pandas.Series
|
||||
The data type of each column.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'AvgTicketPrice', 'timestamp', 'dayOfWeek'])
|
||||
>>> df.es_dtypes
|
||||
Origin keyword
|
||||
AvgTicketPrice float
|
||||
timestamp date
|
||||
dayOfWeek byte
|
||||
dtype: object
|
||||
"""
|
||||
return self._query_compiler.es_dtypes
|
||||
|
||||
def _build_repr(self, num_rows) -> pd.DataFrame:
|
||||
# self could be Series or DataFrame
|
||||
if len(self.index) <= num_rows:
|
||||
|
@ -123,6 +123,10 @@ class QueryCompiler:
|
||||
def dtypes(self):
|
||||
return self._mappings.dtypes()
|
||||
|
||||
@property
|
||||
def es_dtypes(self):
|
||||
return self._mappings.es_dtypes()
|
||||
|
||||
# END Index, columns, and dtypes objects
|
||||
|
||||
def _es_results_to_pandas(self, results, batch_size=None, show_progress=False):
|
||||
|
@ -435,6 +435,13 @@ class Series(NDFrame):
|
||||
"""
|
||||
return self._query_compiler.dtypes[0]
|
||||
|
||||
@property
|
||||
def es_dtype(self) -> str:
|
||||
"""
|
||||
Return the Elasticsearch type of the underlying data.
|
||||
"""
|
||||
return self._query_compiler.es_dtypes[0]
|
||||
|
||||
def __gt__(self, other: Union[int, float, "Series"]) -> BooleanFilter:
|
||||
if isinstance(other, Series):
|
||||
# Need to use scripted query to compare to values
|
||||
|
@ -29,6 +29,7 @@ from .common import (
|
||||
_pd_ecommerce,
|
||||
_ed_flights_small,
|
||||
_pd_flights_small,
|
||||
TestData,
|
||||
)
|
||||
import eland as ed
|
||||
|
||||
@ -146,3 +147,8 @@ def df():
|
||||
return SymmetricAPIChecker(
|
||||
ed_obj=_ed_flights_small, pd_obj=_pd_flights_small.copy()
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def testdata():
|
||||
return TestData()
|
||||
|
@ -18,12 +18,12 @@
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from eland.tests.common import assert_series_equal
|
||||
|
||||
|
||||
class TestDataFrameDtypes:
|
||||
def test_dtypes(self, df):
|
||||
print(df.dtypes)
|
||||
|
||||
for i in range(0, len(df.dtypes) - 1):
|
||||
assert isinstance(df.dtypes[i], type(df.dtypes[i]))
|
||||
|
||||
@ -32,3 +32,40 @@ class TestDataFrameDtypes:
|
||||
df.select_dtypes(exclude=np.number)
|
||||
df.select_dtypes(include=np.float64)
|
||||
df.select_dtypes(exclude=np.float64)
|
||||
|
||||
def test_es_dtypes(self, testdata):
|
||||
df = testdata.ed_flights_small()
|
||||
assert_series_equal(
|
||||
df.es_dtypes,
|
||||
pd.Series(
|
||||
{
|
||||
"AvgTicketPrice": "float",
|
||||
"Cancelled": "boolean",
|
||||
"Carrier": "keyword",
|
||||
"Dest": "keyword",
|
||||
"DestAirportID": "keyword",
|
||||
"DestCityName": "keyword",
|
||||
"DestCountry": "keyword",
|
||||
"DestLocation": "geo_point",
|
||||
"DestRegion": "keyword",
|
||||
"DestWeather": "keyword",
|
||||
"DistanceKilometers": "float",
|
||||
"DistanceMiles": "float",
|
||||
"FlightDelay": "boolean",
|
||||
"FlightDelayMin": "integer",
|
||||
"FlightDelayType": "keyword",
|
||||
"FlightNum": "keyword",
|
||||
"FlightTimeHour": "float",
|
||||
"FlightTimeMin": "float",
|
||||
"Origin": "keyword",
|
||||
"OriginAirportID": "keyword",
|
||||
"OriginCityName": "keyword",
|
||||
"OriginCountry": "keyword",
|
||||
"OriginLocation": "geo_point",
|
||||
"OriginRegion": "keyword",
|
||||
"OriginWeather": "keyword",
|
||||
"dayOfWeek": "byte",
|
||||
"timestamp": "date",
|
||||
}
|
||||
),
|
||||
)
|
||||
|
@ -16,8 +16,10 @@
|
||||
# under the License.
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import warnings
|
||||
from eland.common import build_pd_series, EMPTY_SERIES_DTYPE
|
||||
from eland.tests.common import assert_series_equal
|
||||
|
||||
|
||||
def test_empty_series_dtypes():
|
||||
@ -33,3 +35,9 @@ def test_empty_series_dtypes():
|
||||
assert np.int32 != EMPTY_SERIES_DTYPE
|
||||
assert s.dtype == np.int32
|
||||
assert w == []
|
||||
|
||||
|
||||
def test_series_es_dtypes(testdata):
|
||||
series = testdata.ed_flights_small().AvgTicketPrice
|
||||
assert_series_equal(series.es_dtypes, pd.Series(data={"AvgTicketPrice": "float"}))
|
||||
assert series.es_dtype == "float"
|
||||
|
Loading…
x
Reference in New Issue
Block a user