Add Series.isna() and Series.notna()

This commit is contained in:
Daniel Mesejo-León 2020-05-19 23:12:59 +02:00 committed by GitHub
parent 1378544933
commit 890cf6dc97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 99 additions and 4 deletions

View File

@ -66,11 +66,11 @@ except ImportError:
extlinks = { extlinks = {
"pandas_api_docs": ( "pandas_api_docs": (
"https://pandas.pydata.org/pandas-docs/version/0.25.3/reference/api/%s.html", "https://pandas.pydata.org/pandas-docs/stable/reference/api/%s.html",
"", "",
), ),
"pandas_user_guide": ( "pandas_user_guide": (
"https://pandas.pydata.org/pandas-docs/version/0.25.3/user_guide/%s.html", "https://pandas.pydata.org/pandas-docs/stable/user_guide/%s.html",
"Pandas User Guide/", "Pandas User Guide/",
), ),
"es_api_docs": ( "es_api_docs": (

View File

@ -0,0 +1,6 @@
eland.Series.isna
==================
.. currentmodule:: eland
.. automethod:: Series.isna

View File

@ -0,0 +1,6 @@
eland.Series.notna
==================
.. currentmodule:: eland
.. automethod:: Series.notna

View File

@ -74,6 +74,8 @@ Reindexing / selection / label manipulation
:toctree: api/ :toctree: api/
Series.rename Series.rename
Series.isna
Series.notna
Plotting Plotting
~~~~~~~~ ~~~~~~~~

View File

@ -141,7 +141,7 @@ class Startswith(BooleanFilter):
class IsNull(BooleanFilter): class IsNull(BooleanFilter):
def __init__(self, field: str) -> None: def __init__(self, field: str) -> None:
super().__init__() super().__init__()
self._filter = {"missing": {"field": field}} self._filter = {"bool": {"must_not": {"exists": {"field": field}}}}
class NotNull(BooleanFilter): class NotNull(BooleanFilter):

View File

@ -39,6 +39,8 @@ from eland.filter import (
LessEqual, LessEqual,
ScriptFilter, ScriptFilter,
IsIn, IsIn,
IsNull,
NotNull,
) )
from eland.utils import deprecated_api from eland.utils import deprecated_api
@ -473,6 +475,41 @@ class Series(NDFrame):
else: else:
raise NotImplementedError(other, type(other)) raise NotImplementedError(other, type(other))
def isna(self):
"""
Detect missing values.
Returns
-------
eland.Series
Mask of bool values for each element in Series that indicates whether an element is not an NA value.
See Also
--------
:pandas_api_docs:`pandas.Series.isna`
"""
return IsNull(field=self.name)
isnull = isna
def notna(self):
"""
Detect existing (non-missing) values.
Returns
-------
eland.Series
Mask of bool values for each element in Series that indicates whether an element is not an NA value
See Also
--------
:pandas_api_docs:`pandas.Series.notna`
"""
return NotNull(field=self.name)
notnull = notna
@property @property
def ndim(self): def ndim(self):
""" """

View File

@ -31,7 +31,7 @@ class TestOperators:
assert Like("a", "a*b").build() == {"wildcard": {"a": "a*b"}} assert Like("a", "a*b").build() == {"wildcard": {"a": "a*b"}}
assert Rlike("a", "a*b").build() == {"regexp": {"a": "a*b"}} assert Rlike("a", "a*b").build() == {"regexp": {"a": "a*b"}}
assert Startswith("a", "jj").build() == {"prefix": {"a": "jj"}} assert Startswith("a", "jj").build() == {"prefix": {"a": "jj"}}
assert IsNull("a").build() == {"missing": {"field": "a"}} assert IsNull("a").build() == {"bool": {"must_not": {"exists": {"field": "a"}}}}
assert NotNull("a").build() == {"exists": {"field": "a"}} assert NotNull("a").build() == {"exists": {"field": "a"}}
assert ScriptFilter( assert ScriptFilter(
'doc["num1"].value > params.param1', lang="painless", params={"param1": 5} 'doc["num1"].value > params.param1', lang="painless", params={"param1": 5}

View File

@ -0,0 +1,44 @@
# Licensed to Elasticsearch B.V under one or more agreements.
# Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
# See the LICENSE file in the project root for more information
from eland import eland_to_pandas
from eland.tests.common import TestData
from eland.tests.common import assert_pandas_eland_frame_equal
class TestSeriesNA(TestData):
columns = [
"currency",
"customer_full_name",
"geoip.country_iso_code",
"geoip.region_name",
]
def test_not_isna(self):
ed_ecommerce = self.ed_ecommerce()
pd_ecommerce = eland_to_pandas(ed_ecommerce)
for column in self.columns:
not_isna_ed_ecommerce = ed_ecommerce[~ed_ecommerce[column].isna()]
not_isna_pd_ecommerce = pd_ecommerce[~pd_ecommerce[column].isna()]
assert_pandas_eland_frame_equal(
not_isna_pd_ecommerce, not_isna_ed_ecommerce
)
def test_isna(self):
ed_ecommerce = self.ed_ecommerce()
pd_ecommerce = eland_to_pandas(ed_ecommerce)
isna_ed_ecommerce = ed_ecommerce[ed_ecommerce["geoip.region_name"].isna()]
isna_pd_ecommerce = pd_ecommerce[pd_ecommerce["geoip.region_name"].isna()]
assert_pandas_eland_frame_equal(isna_pd_ecommerce, isna_ed_ecommerce)
def test_notna(self):
ed_ecommerce = self.ed_ecommerce()
pd_ecommerce = eland_to_pandas(ed_ecommerce)
for column in self.columns:
notna_ed_ecommerce = ed_ecommerce[ed_ecommerce[column].notna()]
notna_pd_ecommerce = pd_ecommerce[pd_ecommerce[column].notna()]
assert_pandas_eland_frame_equal(notna_pd_ecommerce, notna_ed_ecommerce)