mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Add support for es_match() to DataFrame and Series
This commit is contained in:
parent
92a8040614
commit
cb4cd083c3
6
docs/sphinx/reference/api/eland.DataFrame.es_match.rst
Normal file
6
docs/sphinx/reference/api/eland.DataFrame.es_match.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.DataFrame.es_match
|
||||
========================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: DataFrame.es_match
|
6
docs/sphinx/reference/api/eland.Series.es_match.rst
Normal file
6
docs/sphinx/reference/api/eland.Series.es_match.rst
Normal file
@ -0,0 +1,6 @@
|
||||
eland.Series.es_match
|
||||
=====================
|
||||
|
||||
.. currentmodule:: eland
|
||||
|
||||
.. automethod:: Series.es_match
|
@ -111,6 +111,7 @@ Elasticsearch Functions
|
||||
:toctree: api/
|
||||
|
||||
DataFrame.es_info
|
||||
DataFrame.es_match
|
||||
DataFrame.es_query
|
||||
DataFrame.es_dtypes
|
||||
|
||||
|
@ -115,5 +115,6 @@ Elasticsearch Functions
|
||||
:toctree: api/
|
||||
|
||||
Series.es_info
|
||||
Series.es_match
|
||||
Series.es_dtype
|
||||
Series.es_dtypes
|
||||
|
@ -19,7 +19,7 @@ import re
|
||||
import sys
|
||||
import warnings
|
||||
from io import StringIO
|
||||
from typing import List, Optional, Sequence, Tuple, Union
|
||||
from typing import Any, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@ -632,6 +632,103 @@ class DataFrame(NDFrame):
|
||||
def info_es(self):
|
||||
return self.es_info()
|
||||
|
||||
def es_match(
|
||||
self,
|
||||
text: str,
|
||||
*,
|
||||
columns: Optional[Union[str, Sequence[str]]] = None,
|
||||
match_phrase: bool = False,
|
||||
must_not_match: bool = False,
|
||||
multi_match_type: Optional[str] = None,
|
||||
match_only_text_fields: bool = True,
|
||||
analyzer: Optional[str] = None,
|
||||
fuzziness: Optional[Union[int, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> "DataFrame":
|
||||
"""Filters data with an Elasticsearch ``match``, ``match_phrase``, or
|
||||
``multi_match`` query depending on the given parameters and columns.
|
||||
|
||||
Read more about `Full-Text Queries in Elasticsearch <https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html>`_
|
||||
|
||||
By default all fields of type 'text' within Elasticsearch are queried
|
||||
otherwise specific columns can be specified via the ``columns`` parameter
|
||||
or a single column can be filtered on with :py:meth:`eland.Series.es_match`
|
||||
|
||||
All additional keyword arguments are passed in the body of the match query.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text: str
|
||||
String of text to search for
|
||||
columns: str, List[str], optional
|
||||
List of columns to search over. Defaults to all 'text' fields in Elasticsearch
|
||||
match_phrase: bool, default False
|
||||
If True will use ``match_phrase`` instead of ``match`` query which takes into account
|
||||
the order of the ``text`` parameter.
|
||||
must_not_match: bool, default False
|
||||
If True will apply a boolean NOT (~) to the
|
||||
query. Instead of requiring a match the query
|
||||
will require text to not match.
|
||||
multi_match_type: str, optional
|
||||
If given and matching against multiple columns will set the ``multi_match.type`` setting
|
||||
match_only_text_fields: bool, default True
|
||||
When True this function will raise an error if any non-text fields
|
||||
are queried to prevent fields that aren't analyzed from not working properly.
|
||||
Set to False to ignore this preventative check.
|
||||
analyzer: str, optional
|
||||
Specify which analyzer to use for the match query
|
||||
fuzziness: int, str, optional
|
||||
Specify the fuzziness option for the match query
|
||||
|
||||
Returns
|
||||
-------
|
||||
DataFrame
|
||||
A filtered :py:class:`eland.DataFrame` with the given match query
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame("localhost:9200", "ecommerce")
|
||||
>>> df.es_match("Men's", columns=["category"])
|
||||
category currency ... type user
|
||||
0 [Men's Clothing] EUR ... order eddie
|
||||
4 [Men's Clothing, Men's Accessories] EUR ... order eddie
|
||||
6 [Men's Clothing] EUR ... order oliver
|
||||
7 [Men's Clothing, Men's Accessories, Men's Shoes] EUR ... order abd
|
||||
11 [Men's Accessories, Men's Clothing] EUR ... order eddie
|
||||
... ... ... ... ... ...
|
||||
4663 [Men's Shoes, Men's Clothing] EUR ... order samir
|
||||
4667 [Men's Clothing, Men's Shoes] EUR ... order sultan
|
||||
4671 [Men's Clothing] EUR ... order jim
|
||||
4672 [Men's Clothing] EUR ... order yahya
|
||||
4674 [Women's Accessories, Men's Clothing] EUR ... order jackson
|
||||
<BLANKLINE>
|
||||
[2310 rows x 45 columns]
|
||||
"""
|
||||
# Determine which columns will be used
|
||||
es_dtypes = self.es_dtypes.to_dict()
|
||||
if columns is None:
|
||||
columns = [
|
||||
column for column, es_dtype in es_dtypes.items() if es_dtype == "text"
|
||||
]
|
||||
elif isinstance(columns, str):
|
||||
columns = [columns]
|
||||
columns = list(columns)
|
||||
|
||||
qc = self._query_compiler
|
||||
filter = qc.es_match(
|
||||
text,
|
||||
columns,
|
||||
match_phrase=match_phrase,
|
||||
match_only_text_fields=match_only_text_fields,
|
||||
multi_match_type=multi_match_type,
|
||||
analyzer=analyzer,
|
||||
fuzziness=fuzziness,
|
||||
**kwargs,
|
||||
)
|
||||
if must_not_match:
|
||||
filter = ~filter
|
||||
return DataFrame(_query_compiler=qc._update_query(filter))
|
||||
|
||||
def es_query(self, query) -> "DataFrame":
|
||||
"""Applies an Elasticsearch DSL query to the current DataFrame.
|
||||
|
||||
|
@ -995,24 +995,12 @@ class Operations:
|
||||
is_scan = False
|
||||
if size is not None and size <= DEFAULT_ES_MAX_RESULT_WINDOW:
|
||||
if size > 0:
|
||||
try:
|
||||
|
||||
es_results = query_compiler._client.search(
|
||||
index=query_compiler._index_pattern,
|
||||
size=size,
|
||||
sort=sort_params,
|
||||
body=body,
|
||||
)
|
||||
except Exception:
|
||||
# Catch all ES errors and print debug (currently to stdout)
|
||||
error = {
|
||||
"index": query_compiler._index_pattern,
|
||||
"size": size,
|
||||
"sort": sort_params,
|
||||
"body": body,
|
||||
}
|
||||
print("Elasticsearch error:", error)
|
||||
raise
|
||||
es_results = query_compiler._client.search(
|
||||
index=query_compiler._index_pattern,
|
||||
size=size,
|
||||
sort=sort_params,
|
||||
body=body,
|
||||
)
|
||||
else:
|
||||
is_scan = True
|
||||
es_results = scan(
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
import copy
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, List, Optional, Sequence
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Union
|
||||
|
||||
import numpy as np # type: ignore
|
||||
import pandas as pd # type: ignore
|
||||
@ -430,6 +430,77 @@ class QueryCompiler:
|
||||
|
||||
return result
|
||||
|
||||
def es_match(
|
||||
self,
|
||||
text: str,
|
||||
columns: Sequence[str],
|
||||
*,
|
||||
match_phrase: bool = False,
|
||||
match_only_text_fields: bool = True,
|
||||
multi_match_type: Optional[str] = None,
|
||||
analyzer: Optional[str] = None,
|
||||
fuzziness: Optional[Union[int, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> QueryFilter:
|
||||
if len(columns) < 1:
|
||||
raise ValueError("columns can't be empty")
|
||||
|
||||
es_dtypes = self.es_dtypes.to_dict()
|
||||
|
||||
# Build the base options for the 'match_*' query
|
||||
options = {"query": text}
|
||||
if analyzer is not None:
|
||||
options["analyzer"] = analyzer
|
||||
if fuzziness is not None:
|
||||
options["fuzziness"] = fuzziness
|
||||
options.update(kwargs)
|
||||
|
||||
# Warn the user if they're not querying text columns
|
||||
if match_only_text_fields:
|
||||
non_text_columns = {}
|
||||
for column in columns:
|
||||
# Don't worry about wildcards
|
||||
if "*" in column:
|
||||
continue
|
||||
|
||||
es_dtype = es_dtypes[column]
|
||||
if es_dtype != "text":
|
||||
non_text_columns[column] = es_dtype
|
||||
if non_text_columns:
|
||||
raise ValueError(
|
||||
f"Attempting to run es_match() on non-text fields "
|
||||
f"({', '.join([k + '=' + v for k, v in non_text_columns.items()])}) "
|
||||
f"means that these fields may not be analyzed properly. "
|
||||
f"Consider reindexing these fields as text or use 'match_only_text_es_dtypes=False' "
|
||||
f"to use match anyways"
|
||||
)
|
||||
else:
|
||||
options.setdefault("lenient", True)
|
||||
|
||||
# If only one column use 'match'
|
||||
# otherwise use 'multi_match' with 'fields'
|
||||
if len(columns) == 1:
|
||||
if multi_match_type is not None:
|
||||
raise ValueError(
|
||||
"multi_match_type parameter only valid "
|
||||
"when searching more than one column"
|
||||
)
|
||||
query = {"match_phrase" if match_phrase else "match": {columns[0]: options}}
|
||||
else:
|
||||
options["fields"] = columns
|
||||
if match_phrase:
|
||||
if multi_match_type not in ("phrase", None):
|
||||
raise ValueError(
|
||||
f"match_phrase=True and multi_match_type={multi_match_type!r} "
|
||||
f"are not compatible. Must be multi_match_type='phrase'"
|
||||
)
|
||||
multi_match_type = "phrase"
|
||||
if multi_match_type is not None:
|
||||
options["type"] = multi_match_type
|
||||
|
||||
query = {"multi_match": options}
|
||||
return QueryFilter(query)
|
||||
|
||||
def es_query(self, query):
|
||||
return self._update_query(QueryFilter(query))
|
||||
|
||||
|
@ -55,6 +55,7 @@ from eland.filter import (
|
||||
LessEqual,
|
||||
NotFilter,
|
||||
NotNull,
|
||||
QueryFilter,
|
||||
ScriptFilter,
|
||||
)
|
||||
from eland.ndframe import NDFrame
|
||||
@ -636,6 +637,74 @@ class Series(NDFrame):
|
||||
)
|
||||
return Series(_query_compiler=new_query_compiler)
|
||||
|
||||
def es_match(
|
||||
self,
|
||||
text: str,
|
||||
*,
|
||||
match_phrase: bool = False,
|
||||
match_only_text_fields: bool = True,
|
||||
analyzer: Optional[str] = None,
|
||||
fuzziness: Optional[Union[int, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> QueryFilter:
|
||||
"""Filters data with an Elasticsearch ``match`` or ``match_phrase``
|
||||
query depending on the given parameters.
|
||||
|
||||
Read more about `Full-Text Queries in Elasticsearch <https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html>`_
|
||||
|
||||
All additional keyword arguments are passed in the body of the match query.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text: str
|
||||
String of text to search for
|
||||
match_phrase: bool, default False
|
||||
If True will use ``match_phrase`` instead of ``match`` query which takes into account
|
||||
the order of the ``text`` parameter.
|
||||
match_only_text_fields: bool, default True
|
||||
When True this function will raise an error if any non-text fields
|
||||
are queried to prevent fields that aren't analyzed from not working properly.
|
||||
Set to False to ignore this preventative check.
|
||||
analyzer: str, optional
|
||||
Specify which analyzer to use for the match query
|
||||
fuzziness: int, str, optional
|
||||
Specify the fuzziness option for the match query
|
||||
|
||||
Returns
|
||||
-------
|
||||
QueryFilter
|
||||
Boolean filter to be combined with other filters and
|
||||
then passed to DataFrame[...].
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> df = ed.DataFrame(
|
||||
... "localhost:9200", "ecommerce",
|
||||
... columns=["category", "taxful_total_price"]
|
||||
... )
|
||||
>>> df[
|
||||
... df.category.es_match("Men's")
|
||||
... & (df.taxful_total_price > 200.0)
|
||||
... ].head(5)
|
||||
category taxful_total_price
|
||||
13 [Men's Clothing] 266.96
|
||||
33 [Men's Clothing] 221.98
|
||||
54 [Men's Clothing] 234.98
|
||||
93 [Men's Shoes, Women's Accessories] 239.98
|
||||
273 [Men's Shoes] 214.98
|
||||
<BLANKLINE>
|
||||
[5 rows x 2 columns]
|
||||
"""
|
||||
return self._query_compiler.es_match(
|
||||
text,
|
||||
columns=[self.name],
|
||||
match_phrase=match_phrase,
|
||||
match_only_text_fields=match_only_text_fields,
|
||||
analyzer=analyzer,
|
||||
fuzziness=fuzziness,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def es_info(self) -> str:
|
||||
buf = StringIO()
|
||||
|
||||
|
41
eland/tests/dataframe/test_es_match_pytest.py
Normal file
41
eland/tests/dataframe/test_es_match_pytest.py
Normal file
@ -0,0 +1,41 @@
|
||||
# Licensed to Elasticsearch B.V. under one or more contributor
|
||||
# license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright
|
||||
# ownership. Elasticsearch B.V. licenses this file to you under
|
||||
# the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
from eland.tests.common import TestData
|
||||
|
||||
|
||||
class TestEsMatch(TestData):
|
||||
def test_match(self):
|
||||
df = self.ed_ecommerce()
|
||||
|
||||
categories = list(df.es_match("Men's").category.to_pandas())
|
||||
assert len(categories) > 0
|
||||
assert all(any("Men's" in y for y in x) for x in categories)
|
||||
|
||||
def test_must_not_match(self):
|
||||
df = self.ed_ecommerce()
|
||||
|
||||
categories = list(
|
||||
df.es_match("Men's", must_not_match=True)
|
||||
.es_match("Women's")
|
||||
.category.to_pandas()
|
||||
)
|
||||
assert len(categories) > 0
|
||||
assert all(all("Men's" not in y for y in x) for x in categories)
|
||||
assert all(any("Women's" in y for y in x) for x in categories)
|
196
eland/tests/query_compiler/test_es_match_pytest.py
Normal file
196
eland/tests/query_compiler/test_es_match_pytest.py
Normal file
@ -0,0 +1,196 @@
|
||||
# Licensed to Elasticsearch B.V. under one or more contributor
|
||||
# license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright
|
||||
# ownership. Elasticsearch B.V. licenses this file to you under
|
||||
# the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
import pytest
|
||||
|
||||
from eland.query_compiler import QueryCompiler
|
||||
from eland.tests.common import TestData
|
||||
|
||||
|
||||
class TestEsMatch(TestData):
|
||||
def test_es_match(self):
|
||||
df = self.ed_ecommerce()
|
||||
query_compiler: QueryCompiler = df._query_compiler
|
||||
|
||||
filter = query_compiler.es_match(
|
||||
"joe", ["customer_full_name"], analyzer="my-analyzer", fuzziness="1..2"
|
||||
)
|
||||
assert filter.build() == {
|
||||
"match": {
|
||||
"customer_full_name": {
|
||||
"query": "joe",
|
||||
"analyzer": "my-analyzer",
|
||||
"fuzziness": "1..2",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
filter = query_compiler.es_match(
|
||||
"joe", ["customer_last_name", "customer_first_name"]
|
||||
)
|
||||
assert filter.build() == {
|
||||
"multi_match": {
|
||||
"query": "joe",
|
||||
"fields": ["customer_last_name", "customer_first_name"],
|
||||
}
|
||||
}
|
||||
|
||||
def test_es_match_must_not_match(self):
|
||||
df = self.ed_ecommerce()
|
||||
|
||||
# single match
|
||||
df2 = df.es_match("joe", columns=["customer_full_name"], must_not_match=True)
|
||||
query_params, _ = df2._query_compiler._operations._resolve_tasks(
|
||||
df2._query_compiler
|
||||
)
|
||||
assert query_params.query.to_search_body() == {
|
||||
"query": {
|
||||
"bool": {
|
||||
"must_not": {"match": {"customer_full_name": {"query": "joe"}}}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# multi_match
|
||||
df2 = df.es_match(
|
||||
"joe",
|
||||
columns=["customer_first_name", "customer_last_name"],
|
||||
must_not_match=True,
|
||||
)
|
||||
query_params, _ = df2._query_compiler._operations._resolve_tasks(
|
||||
df2._query_compiler
|
||||
)
|
||||
assert query_params.query.to_search_body() == {
|
||||
"query": {
|
||||
"bool": {
|
||||
"must_not": {
|
||||
"multi_match": {
|
||||
"fields": [
|
||||
"customer_first_name",
|
||||
"customer_last_name",
|
||||
],
|
||||
"query": "joe",
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def test_es_match_phrase(self):
|
||||
df = self.ed_ecommerce()
|
||||
query_compiler: QueryCompiler = df._query_compiler
|
||||
|
||||
filter = query_compiler.es_match(
|
||||
"joe", ["customer_full_name"], match_phrase=True
|
||||
)
|
||||
assert filter.build() == {
|
||||
"match_phrase": {
|
||||
"customer_full_name": {
|
||||
"query": "joe",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
filter = query_compiler.es_match(
|
||||
"joe", ["customer_last_name", "customer_first_name"], match_phrase=True
|
||||
)
|
||||
assert filter.build() == {
|
||||
"multi_match": {
|
||||
"query": "joe",
|
||||
"type": "phrase",
|
||||
"fields": ["customer_last_name", "customer_first_name"],
|
||||
}
|
||||
}
|
||||
|
||||
def test_es_match_phrase_not_allowed_with_multi_match_type(self):
|
||||
df = self.ed_ecommerce()
|
||||
query_compiler: QueryCompiler = df._query_compiler
|
||||
|
||||
with pytest.raises(ValueError) as e:
|
||||
query_compiler.es_match(
|
||||
"joe",
|
||||
["customer_first_name", "customer_last_name"],
|
||||
match_phrase=True,
|
||||
multi_match_type="best_fields",
|
||||
)
|
||||
assert str(e.value) == (
|
||||
"match_phrase=True and multi_match_type='best_fields' "
|
||||
"are not compatible. Must be multi_match_type='phrase'"
|
||||
)
|
||||
|
||||
filter = query_compiler.es_match(
|
||||
"joe",
|
||||
["customer_last_name", "customer_first_name"],
|
||||
match_phrase=True,
|
||||
multi_match_type="phrase",
|
||||
)
|
||||
assert filter.build() == {
|
||||
"multi_match": {
|
||||
"query": "joe",
|
||||
"type": "phrase",
|
||||
"fields": ["customer_last_name", "customer_first_name"],
|
||||
}
|
||||
}
|
||||
|
||||
def test_es_match_non_text_fields(self):
|
||||
df = self.ed_ecommerce()
|
||||
query_compiler: QueryCompiler = df._query_compiler
|
||||
|
||||
with pytest.raises(ValueError) as e:
|
||||
query_compiler.es_match(
|
||||
"joe",
|
||||
[
|
||||
"customer_first_name",
|
||||
"order_date",
|
||||
"customer_last_name",
|
||||
"currency",
|
||||
"order_*",
|
||||
],
|
||||
)
|
||||
assert str(e.value) == (
|
||||
"Attempting to run es_match() on non-text fields (order_date=date, "
|
||||
"currency=keyword) means that these fields may not be analyzed properly. "
|
||||
"Consider reindexing these fields as text or use 'match_only_text_es_dtypes=False' "
|
||||
"to use match anyways"
|
||||
)
|
||||
|
||||
filter = query_compiler.es_match(
|
||||
"joe",
|
||||
[
|
||||
"customer_first_name",
|
||||
"order_date",
|
||||
"customer_last_name",
|
||||
"currency",
|
||||
"order_*",
|
||||
],
|
||||
match_only_text_fields=False,
|
||||
)
|
||||
assert filter.build() == {
|
||||
"multi_match": {
|
||||
"query": "joe",
|
||||
"lenient": True,
|
||||
"fields": [
|
||||
"customer_first_name",
|
||||
"order_date",
|
||||
"customer_last_name",
|
||||
"currency",
|
||||
"order_*",
|
||||
],
|
||||
}
|
||||
}
|
41
eland/tests/series/test_es_match.py
Normal file
41
eland/tests/series/test_es_match.py
Normal file
@ -0,0 +1,41 @@
|
||||
# Licensed to Elasticsearch B.V. under one or more contributor
|
||||
# license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright
|
||||
# ownership. Elasticsearch B.V. licenses this file to you under
|
||||
# the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# File called _pytest for PyCharm compatability
|
||||
|
||||
from eland.tests.common import TestData
|
||||
|
||||
|
||||
class TestEsMatch(TestData):
|
||||
def test_match(self):
|
||||
df = self.ed_ecommerce()
|
||||
|
||||
categories = list(df[df.category.es_match("Men's")].category.to_pandas())
|
||||
assert len(categories) > 0
|
||||
assert all(any("Men's" in y for y in x) for x in categories)
|
||||
|
||||
def test_must_not_match(self):
|
||||
df = self.ed_ecommerce()
|
||||
|
||||
categories = list(
|
||||
df[
|
||||
~df.category.es_match("Men's") & df.category.es_match("Women's")
|
||||
].category.to_pandas()
|
||||
)
|
||||
assert len(categories) > 0
|
||||
assert all(all("Men's" not in y for y in x) for x in categories)
|
||||
assert all(any("Women's" in y for y in x) for x in categories)
|
Loading…
x
Reference in New Issue
Block a user