Move tests directory outside of eland namespace

This commit is contained in:
P. Sai Vinay 2020-11-16 23:00:41 +05:30 committed by GitHub
parent 56f6ba6c8b
commit 473db4576b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
91 changed files with 321 additions and 297 deletions

6
.gitignore vendored
View File

@ -1,5 +1,6 @@
# Compiled python modules.
*.pyc
__pycache__/
# Setuptools distribution folder.
dist/
@ -11,18 +12,19 @@ build/
docs/build/
# pytest results
eland/tests/dataframe/results/
tests/dataframe/results/*csv
result_images/
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
eland.egg-info/
# PyCharm files
.idea/
# vscode files
.vscode/*
.vscode/
# pytest files
.pytest_cache/

View File

@ -79,8 +79,8 @@ Once your changes and tests are ready to submit for review:
$ nox -s format
# Run the test suite
$ pytest --doctest-modules eland/tests/
$ pytest --nbval eland/tests/tests_notebook/
$ pytest --doctest-modules tests/
$ pytest --nbval tests/notebook/
```
@ -182,7 +182,7 @@ currently using a minimum version of PyCharm 2019.2.4.
* To setup test environment:
``` bash
> python -m eland.tests.setup_tests
> python -m tests.setup_tests
```
(Note this modifies Elasticsearch indices)

View File

@ -87,7 +87,7 @@ Once your changes and tests are ready to submit for review:
# Run the test suite
$ pytest --doctest-modules eland/tests/
$ pytest --nbval eland/tests/tests_notebook/
$ pytest --nbval eland/tests/notebook/
2. Sign the Contributor License Agreement
@ -184,13 +184,13 @@ Configuring PyCharm And Running Tests
- To setup test environment -*note this modifies Elasticsearch indices* run
.. code-block:: bash
python -m eland.tests.setup_tests
python -m tests.setup_tests
- To validate installation, open python console and run
.. code-block:: bash
import eland as ed
ed_df = ed.DataFrame('localhost', 'flights')
import eland as ed
ed_df = ed.DataFrame('localhost', 'flights')
- To run the automatic formatter and check for lint issues
.. code-block:: bash

View File

@ -22,13 +22,7 @@ from pathlib import Path
import nox
BASE_DIR = Path(__file__).parent
SOURCE_FILES = (
"setup.py",
"noxfile.py",
"eland/",
"docs/",
"utils/",
)
SOURCE_FILES = ("setup.py", "noxfile.py", "eland/", "docs/", "utils/", "tests/")
# Whenever type-hints are completed on a file it should
# be added here so that this file will continue to be checked
@ -98,15 +92,18 @@ def lint(session):
@nox.session(python=["3.6", "3.7", "3.8"])
def test(session):
session.install("-r", "requirements-dev.txt")
session.run("python", "-m", "eland.tests.setup_tests")
session.run("python", "-m", "tests.setup_tests")
session.install(".")
session.run(
"python",
"-m",
"pytest",
"--cov=eland",
"--cov-report",
"term-missing",
"--cov=eland/",
"--doctest-modules",
*(session.posargs or ("eland/",)),
"--nbval",
"eland/tests/tests_notebook/",
*(session.posargs or ("eland/", "tests/")),
)
session.run(
@ -119,7 +116,7 @@ def test(session):
"xgboost",
"lightgbm",
)
session.run("pytest", "eland/tests/ml/")
session.run("pytest", "tests/ml/")
@nox.session(reuse_venv=True)
@ -138,7 +135,7 @@ def docs(session):
es = elasticsearch.Elasticsearch("localhost:9200")
es.info()
if not es.indices.exists("flights"):
session.run("python", "-m", "eland.tests.setup_tests")
session.run("python", "-m", "tests.setup_tests")
es_active = True
except Exception:
es_active = False

View File

@ -25,7 +25,7 @@ import eland as ed
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
# Create pandas and eland data frames
from eland.tests import (
from tests import (
ECOMMERCE_DF_FILE_NAME,
ECOMMERCE_INDEX_NAME,
ES_TEST_CLIENT,

View File

@ -21,7 +21,7 @@ import numpy as np
import pytest
from pandas.testing import assert_frame_equal, assert_series_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameAggs(TestData):

View File

@ -18,7 +18,7 @@
# File called _pytest for PyCharm compatability
import eland as ed
from eland.tests.common import ES_TEST_CLIENT, TestData
from tests.common import ES_TEST_CLIENT, TestData
class TestDataFrameBigMapping(TestData):

View File

@ -18,7 +18,7 @@
# File called _pytest for PyCharm compatability
from pandas.testing import assert_series_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameCount(TestData):

View File

@ -24,7 +24,7 @@ from pandas.testing import assert_series_equal
import eland as ed
from eland.field_mappings import FieldMappings
from eland.tests.common import (
from tests.common import (
ES_TEST_CLIENT,
TestData,
assert_pandas_eland_frame_equal,

View File

@ -19,7 +19,7 @@
from pandas.testing import assert_frame_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameDescribe(TestData):

View File

@ -18,7 +18,7 @@
# File called _pytest for PyCharm compatibility
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameDir(TestData):

View File

@ -20,7 +20,7 @@
import numpy as np
import pandas as pd
from eland.tests.common import assert_series_equal
from tests.common import assert_series_equal
class TestDataFrameDtypes:

View File

@ -17,7 +17,7 @@
# File called _pytest for PyCharm compatability
from eland.tests.common import TestData
from tests.common import TestData
class TestEsMatch(TestData):

View File

@ -19,7 +19,7 @@
import pytest
from eland.tests.common import TestData, assert_eland_frame_equal
from tests.common import TestData, assert_eland_frame_equal
class TestDataEsQuery(TestData):

View File

@ -19,7 +19,7 @@
import pytest
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameFilter(TestData):

View File

@ -1,196 +1,196 @@
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# File called _pytest for PyCharm compatability
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal
from eland.tests.common import TestData
class TestGroupbyDataFrame(TestData):
funcs = ["max", "min", "mean", "sum"]
filter_data = [
"AvgTicketPrice",
"Cancelled",
"dayOfWeek",
]
@pytest.mark.parametrize("numeric_only", [True])
def test_groupby_aggregate(self, numeric_only):
# TODO Add tests for numeric_only=False for aggs
# when we support aggregations on text fields
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = pd_flights.groupby("Cancelled").agg(
self.funcs, numeric_only=numeric_only
)
ed_groupby = ed_flights.groupby("Cancelled").agg(
self.funcs, numeric_only=numeric_only
)
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(pd_groupby, ed_groupby, check_exact=False, check_dtype=False)
@pytest.mark.parametrize("pd_agg", funcs)
def test_groupby_aggregate_single_aggs(self, pd_agg):
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = pd_flights.groupby("Cancelled").agg([pd_agg], numeric_only=True)
ed_groupby = ed_flights.groupby("Cancelled").agg([pd_agg], numeric_only=True)
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(pd_groupby, ed_groupby, check_exact=False, check_dtype=False)
@pytest.mark.parametrize("pd_agg", ["max", "min", "mean", "sum", "median"])
def test_groupby_aggs_numeric_only_true(self, pd_agg):
# Pandas has numeric_only applicable for the above aggs with groupby only.
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)(numeric_only=True)
ed_groupby = getattr(ed_flights.groupby("Cancelled"), pd_agg)(numeric_only=True)
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(
pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=2
)
@pytest.mark.parametrize("pd_agg", ["mad", "var", "std"])
def test_groupby_aggs_mad_var_std(self, pd_agg):
# For these aggs pandas doesn't support numeric_only
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)()
ed_groupby = getattr(ed_flights.groupby("Cancelled"), pd_agg)(numeric_only=True)
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(
pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=4
)
@pytest.mark.parametrize("pd_agg", ["nunique"])
def test_groupby_aggs_nunique(self, pd_agg):
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)()
ed_groupby = getattr(ed_flights.groupby("Cancelled"), pd_agg)()
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(
pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=4
)
@pytest.mark.parametrize("pd_agg", ["max", "min", "mean", "median"])
def test_groupby_aggs_numeric_only_false(self, pd_agg):
pd_flights = self.pd_flights().filter(self.filter_data + ["timestamp"])
ed_flights = self.ed_flights().filter(self.filter_data + ["timestamp"])
# pandas numeric_only=False, matches with Eland numeric_only=None
pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)(
numeric_only=False
)
ed_groupby = getattr(ed_flights.groupby("Cancelled"), pd_agg)(numeric_only=None)
# sum usually returns NaT for Eland, Nothing is returned from pandas
# we only check timestamp field here, because remaining cols are similar to numeric_only=True tests
# assert_frame_equal doesn't work well for timestamp fields (It converts into int)
# so we convert it into float
pd_timestamp = pd.to_numeric(pd_groupby["timestamp"], downcast="float")
ed_timestamp = pd.to_numeric(ed_groupby["timestamp"], downcast="float")
assert_series_equal(pd_timestamp, ed_timestamp, check_exact=False, rtol=4)
def test_groupby_columns(self):
# Check errors
ed_flights = self.ed_flights().filter(self.filter_data)
match = "by parameter should be specified to groupby"
with pytest.raises(ValueError, match=match):
ed_flights.groupby(None).mean()
by = ["ABC", "Cancelled"]
match = "Requested columns 'ABC' not in the DataFrame"
with pytest.raises(KeyError, match=match):
ed_flights.groupby(by).mean()
@pytest.mark.parametrize(
"by",
["timestamp", "dayOfWeek", "Carrier", "Cancelled", ["dayOfWeek", "Carrier"]],
)
def test_groupby_different_dtypes(self, by):
columns = ["dayOfWeek", "Carrier", "timestamp", "Cancelled"]
pd_flights = self.pd_flights_small().filter(columns)
ed_flights = self.ed_flights_small().filter(columns)
pd_groupby = pd_flights.groupby(by).nunique()
ed_groupby = ed_flights.groupby(by).nunique()
assert list(pd_groupby.index) == list(ed_groupby.index)
assert pd_groupby.index.dtype == ed_groupby.index.dtype
assert list(pd_groupby.columns) == list(ed_groupby.columns)
def test_groupby_dropna(self):
# TODO Add tests once dropna is implemeted
pass
@pytest.mark.parametrize("groupby", ["dayOfWeek", ["dayOfWeek", "Cancelled"]])
@pytest.mark.parametrize(
["func", "func_args"],
[
("count", ()),
("agg", ("count",)),
("agg", (["count"],)),
("agg", (["max", "count", "min"],)),
],
)
def test_groupby_dataframe_count(self, groupby, func, func_args):
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_count = getattr(pd_flights.groupby(groupby), func)(*func_args)
ed_count = getattr(ed_flights.groupby(groupby), func)(*func_args)
assert_index_equal(pd_count.columns, ed_count.columns)
assert_index_equal(pd_count.index, ed_count.index)
assert_frame_equal(pd_count, ed_count)
assert_series_equal(pd_count.dtypes, ed_count.dtypes)
def test_groupby_dataframe_mad(self):
pd_flights = self.pd_flights().filter(self.filter_data + ["DestCountry"])
ed_flights = self.ed_flights().filter(self.filter_data + ["DestCountry"])
pd_mad = pd_flights.groupby("DestCountry").mad()
ed_mad = ed_flights.groupby("DestCountry").mad()
assert_index_equal(pd_mad.columns, ed_mad.columns)
assert_index_equal(pd_mad.index, ed_mad.index)
assert_series_equal(pd_mad.dtypes, ed_mad.dtypes)
pd_min_mad = pd_flights.groupby("DestCountry").aggregate(["min", "mad"])
ed_min_mad = ed_flights.groupby("DestCountry").aggregate(["min", "mad"])
assert_index_equal(pd_min_mad.columns, ed_min_mad.columns)
assert_index_equal(pd_min_mad.index, ed_min_mad.index)
assert_series_equal(pd_min_mad.dtypes, ed_min_mad.dtypes)
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# File called _pytest for PyCharm compatability
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal
from tests.common import TestData
class TestGroupbyDataFrame(TestData):
funcs = ["max", "min", "mean", "sum"]
filter_data = [
"AvgTicketPrice",
"Cancelled",
"dayOfWeek",
]
@pytest.mark.parametrize("numeric_only", [True])
def test_groupby_aggregate(self, numeric_only):
# TODO Add tests for numeric_only=False for aggs
# when we support aggregations on text fields
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = pd_flights.groupby("Cancelled").agg(
self.funcs, numeric_only=numeric_only
)
ed_groupby = ed_flights.groupby("Cancelled").agg(
self.funcs, numeric_only=numeric_only
)
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(pd_groupby, ed_groupby, check_exact=False, check_dtype=False)
@pytest.mark.parametrize("pd_agg", funcs)
def test_groupby_aggregate_single_aggs(self, pd_agg):
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = pd_flights.groupby("Cancelled").agg([pd_agg], numeric_only=True)
ed_groupby = ed_flights.groupby("Cancelled").agg([pd_agg], numeric_only=True)
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(pd_groupby, ed_groupby, check_exact=False, check_dtype=False)
@pytest.mark.parametrize("pd_agg", ["max", "min", "mean", "sum", "median"])
def test_groupby_aggs_numeric_only_true(self, pd_agg):
# Pandas has numeric_only applicable for the above aggs with groupby only.
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)(numeric_only=True)
ed_groupby = getattr(ed_flights.groupby("Cancelled"), pd_agg)(numeric_only=True)
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(
pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=2
)
@pytest.mark.parametrize("pd_agg", ["mad", "var", "std"])
def test_groupby_aggs_mad_var_std(self, pd_agg):
# For these aggs pandas doesn't support numeric_only
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)()
ed_groupby = getattr(ed_flights.groupby("Cancelled"), pd_agg)(numeric_only=True)
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(
pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=4
)
@pytest.mark.parametrize("pd_agg", ["nunique"])
def test_groupby_aggs_nunique(self, pd_agg):
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)()
ed_groupby = getattr(ed_flights.groupby("Cancelled"), pd_agg)()
# checking only values because dtypes are checked in aggs tests
assert_frame_equal(
pd_groupby, ed_groupby, check_exact=False, check_dtype=False, rtol=4
)
@pytest.mark.parametrize("pd_agg", ["max", "min", "mean", "median"])
def test_groupby_aggs_numeric_only_false(self, pd_agg):
pd_flights = self.pd_flights().filter(self.filter_data + ["timestamp"])
ed_flights = self.ed_flights().filter(self.filter_data + ["timestamp"])
# pandas numeric_only=False, matches with Eland numeric_only=None
pd_groupby = getattr(pd_flights.groupby("Cancelled"), pd_agg)(
numeric_only=False
)
ed_groupby = getattr(ed_flights.groupby("Cancelled"), pd_agg)(numeric_only=None)
# sum usually returns NaT for Eland, Nothing is returned from pandas
# we only check timestamp field here, because remaining cols are similar to numeric_only=True tests
# assert_frame_equal doesn't work well for timestamp fields (It converts into int)
# so we convert it into float
pd_timestamp = pd.to_numeric(pd_groupby["timestamp"], downcast="float")
ed_timestamp = pd.to_numeric(ed_groupby["timestamp"], downcast="float")
assert_series_equal(pd_timestamp, ed_timestamp, check_exact=False, rtol=4)
def test_groupby_columns(self):
# Check errors
ed_flights = self.ed_flights().filter(self.filter_data)
match = "by parameter should be specified to groupby"
with pytest.raises(ValueError, match=match):
ed_flights.groupby(None).mean()
by = ["ABC", "Cancelled"]
match = "Requested columns 'ABC' not in the DataFrame"
with pytest.raises(KeyError, match=match):
ed_flights.groupby(by).mean()
@pytest.mark.parametrize(
"by",
["timestamp", "dayOfWeek", "Carrier", "Cancelled", ["dayOfWeek", "Carrier"]],
)
def test_groupby_different_dtypes(self, by):
columns = ["dayOfWeek", "Carrier", "timestamp", "Cancelled"]
pd_flights = self.pd_flights_small().filter(columns)
ed_flights = self.ed_flights_small().filter(columns)
pd_groupby = pd_flights.groupby(by).nunique()
ed_groupby = ed_flights.groupby(by).nunique()
assert list(pd_groupby.index) == list(ed_groupby.index)
assert pd_groupby.index.dtype == ed_groupby.index.dtype
assert list(pd_groupby.columns) == list(ed_groupby.columns)
def test_groupby_dropna(self):
# TODO Add tests once dropna is implemeted
pass
@pytest.mark.parametrize("groupby", ["dayOfWeek", ["dayOfWeek", "Cancelled"]])
@pytest.mark.parametrize(
["func", "func_args"],
[
("count", ()),
("agg", ("count",)),
("agg", (["count"],)),
("agg", (["max", "count", "min"],)),
],
)
def test_groupby_dataframe_count(self, groupby, func, func_args):
pd_flights = self.pd_flights().filter(self.filter_data)
ed_flights = self.ed_flights().filter(self.filter_data)
pd_count = getattr(pd_flights.groupby(groupby), func)(*func_args)
ed_count = getattr(ed_flights.groupby(groupby), func)(*func_args)
assert_index_equal(pd_count.columns, ed_count.columns)
assert_index_equal(pd_count.index, ed_count.index)
assert_frame_equal(pd_count, ed_count)
assert_series_equal(pd_count.dtypes, ed_count.dtypes)
def test_groupby_dataframe_mad(self):
pd_flights = self.pd_flights().filter(self.filter_data + ["DestCountry"])
ed_flights = self.ed_flights().filter(self.filter_data + ["DestCountry"])
pd_mad = pd_flights.groupby("DestCountry").mad()
ed_mad = ed_flights.groupby("DestCountry").mad()
assert_index_equal(pd_mad.columns, ed_mad.columns)
assert_index_equal(pd_mad.index, ed_mad.index)
assert_series_equal(pd_mad.dtypes, ed_mad.dtypes)
pd_min_mad = pd_flights.groupby("DestCountry").aggregate(["min", "mad"])
ed_min_mad = ed_flights.groupby("DestCountry").aggregate(["min", "mad"])
assert_index_equal(pd_min_mad.columns, ed_min_mad.columns)
assert_index_equal(pd_min_mad.index, ed_min_mad.index)
assert_series_equal(pd_min_mad.dtypes, ed_min_mad.dtypes)

View File

@ -17,7 +17,7 @@
# File called _pytest for PyCharm compatability
from eland.tests.common import TestData, assert_pandas_eland_frame_equal
from tests.common import TestData, assert_pandas_eland_frame_equal
class TestDataFrameHeadTail(TestData):

View File

@ -21,7 +21,7 @@ import numpy as np
import pandas as pd
from pandas.testing import assert_frame_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameHist(TestData):

View File

@ -19,8 +19,8 @@
from io import StringIO
import eland as ed
from eland.tests import ES_TEST_CLIENT
from eland.tests.common import TestData
from tests import ES_TEST_CLIENT
from tests.common import TestData
class TestDataFrameInfo(TestData):

View File

@ -21,7 +21,7 @@ import pytest
import eland as ed
from eland.query_compiler import QueryCompiler
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
class TestDataFrameInit:

View File

@ -19,7 +19,7 @@
from pandas.testing import assert_index_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameKeys(TestData):

View File

@ -22,7 +22,7 @@ import pandas as pd
import pytest
from pandas.testing import assert_frame_equal, assert_series_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameMetrics(TestData):

View File

@ -19,7 +19,7 @@
from pandas.testing import assert_series_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameNUnique(TestData):

View File

@ -20,7 +20,7 @@
import pandas as pd
import eland as ed
from eland.tests.common import ES_TEST_CLIENT, TestData, assert_pandas_eland_frame_equal
from tests.common import ES_TEST_CLIENT, TestData, assert_pandas_eland_frame_equal
class TestDataFrameQuery(TestData):

View File

@ -21,7 +21,7 @@ import pandas as pd
import pytest
from eland.dataframe import DEFAULT_NUM_ROWS_DISPLAYED
from eland.tests.common import TestData, assert_pandas_eland_series_equal
from tests.common import TestData, assert_pandas_eland_series_equal
class TestDataFrameRepr(TestData):
@ -161,6 +161,9 @@ class TestDataFrameRepr(TestData):
ed_ecom_r = repr(ed_ecom[ed_ecom["currency"] == "USD"])
pd_ecom_r = repr(pd_ecom[pd_ecom["currency"] == "USD"])
print(ed_ecom_r)
print(pd_ecom_r)
assert ed_ecom_r == pd_ecom_r
"""

View File

@ -20,7 +20,7 @@ import pytest
from pandas.testing import assert_frame_equal
from eland import eland_to_pandas
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameSample(TestData):

View File

@ -18,7 +18,7 @@
# File called _pytest for PyCharm compatability
import numpy as np
from eland.tests.common import TestData, assert_pandas_eland_frame_equal
from tests.common import TestData, assert_pandas_eland_frame_equal
class TestDataFrameSelectDTypes(TestData):

View File

@ -17,7 +17,7 @@
# File called _pytest for PyCharm compatability
from eland.tests.common import TestData
from tests.common import TestData
class TestDataFrameShape(TestData):

View File

@ -24,8 +24,8 @@ import pandas as pd
from pandas.testing import assert_frame_equal
import eland as ed
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import ROOT_DIR, TestData
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import ROOT_DIR, TestData
class TestDataFrameToCSV(TestData):

View File

@ -23,7 +23,7 @@ import pytest
import eland as ed
from eland.field_mappings import FieldMappings
from eland.tests.common import ES_TEST_CLIENT, TestData, assert_pandas_eland_frame_equal
from tests.common import ES_TEST_CLIENT, TestData, assert_pandas_eland_frame_equal
class TestDataFrameUtils(TestData):
@ -137,7 +137,7 @@ class TestDataFrameUtils(TestData):
ES_TEST_CLIENT.indices.delete(index=index_name)
def test_eland_to_pandas_performance(self):
def tests_to_pandas_performance(self):
# TODO quantify this
ed.eland_to_pandas(self.ed_flights(), show_progress=True)

View File

@ -22,7 +22,7 @@ import pytest
from elasticsearch.helpers import BulkIndexError
from eland import DataFrame, pandas_to_eland
from eland.tests.common import (
from tests.common import (
ES_TEST_CLIENT,
assert_frame_equal,
assert_pandas_eland_frame_equal,

View File

@ -19,8 +19,8 @@
import pytest
from eland.field_mappings import FieldMappings
from eland.tests import ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT
from eland.tests.common import TestData
from tests import ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT
from tests.common import TestData
class TestAggregatables(TestData):

View File

@ -19,7 +19,7 @@
from datetime import datetime
from eland.field_mappings import FieldMappings
from eland.tests.common import ES_TEST_CLIENT, TestData
from tests.common import ES_TEST_CLIENT, TestData
class TestDateTime(TestData):

View File

@ -19,8 +19,8 @@
import pytest
from eland.field_mappings import FieldMappings
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData
class TestDisplayNames(TestData):

View File

@ -19,8 +19,8 @@
from pandas.testing import assert_series_equal
from eland.field_mappings import FieldMappings
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData
class TestDTypes(TestData):

View File

@ -20,8 +20,8 @@ import pytest
from pandas.testing import assert_series_equal
from eland.field_mappings import FieldMappings
from eland.tests import FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING
from eland.tests.common import ES_TEST_CLIENT, TestData
from tests import FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING
from tests.common import ES_TEST_CLIENT, TestData
class TestFieldNamePDDType(TestData):

View File

@ -21,8 +21,8 @@ from pandas.testing import assert_index_equal
# File called _pytest for PyCharm compatability
from eland.field_mappings import FieldMappings
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData
class TestGetFieldNames(TestData):

View File

@ -19,7 +19,7 @@
import pytest
from eland.field_mappings import FieldMappings
from eland.tests.common import TestData
from tests.common import TestData
class TestMappingsWithType(TestData):

View File

@ -20,8 +20,8 @@
import numpy as np
from eland.field_mappings import FieldMappings
from eland.tests import ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData
from tests import ECOMMERCE_INDEX_NAME, ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData
class TestMetricSourceFields(TestData):

View File

@ -18,8 +18,8 @@
# File called _pytest for PyCharm compatability
from eland.field_mappings import FieldMappings
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData
class TestRename(TestData):

View File

@ -21,8 +21,8 @@ from io import StringIO
import numpy as np
from eland.field_mappings import FieldMappings
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData
class TestScriptedFields(TestData):

View File

@ -19,7 +19,7 @@ import numpy as np
import pytest
from eland.ml import MLModel
from eland.tests import ES_TEST_CLIENT, ES_VERSION
from tests import ES_TEST_CLIENT, ES_VERSION
try:
from sklearn import datasets

View File

@ -1,3 +1,3 @@
,state,account length,area code,phone number,international plan,voice mail plan,number vmail messages,total day minutes,total day calls,total day charge,total eve minutes,total eve calls,total eve charge,total night minutes,total night calls,total night charge,total intl minutes,total intl calls,total intl charge,customer service calls,churn
0,KS,128,415,382-4657,no,yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,0
,state,account length,area code,phone number,international plan,voice mail plan,number vmail messages,total day minutes,total day calls,total day charge,total eve minutes,total eve calls,total eve charge,total night minutes,total night calls,total night charge,total intl minutes,total intl calls,total intl charge,customer service calls,churn
0,KS,128,415,382-4657,no,yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.7,1,0
1,OH,107,415,371-7191,no,yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.7,1,0
1 state account length area code phone number international plan voice mail plan number vmail messages total day minutes total day calls total day charge total eve minutes total eve calls total eve charge total night minutes total night calls total night charge total intl minutes total intl calls total intl charge customer service calls churn
2 0 KS 128 415 382-4657 no yes 25 265.1 110 45.07 197.4 99 16.78 244.7 91 11.01 10.0 3 2.7 1 0
3 1 OH 107 415 371-7191 no yes 26 161.6 123 27.47 195.5 103 16.62 254.4 103 11.45 13.7 3 3.7 1 0

View File

@ -42,8 +42,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"2020-10-26 12:19:00.259395: read 10000 rows\n",
"2020-10-26 12:19:00.948930: read 13059 rows\n"
"2020-10-28 22:01:46.397163: read 10000 rows\n",
"2020-10-28 22:01:47.100938: read 13059 rows\n"
]
}
],
@ -77,6 +77,27 @@
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3e3e6e7371be43aabd4f9a2bb62ed737",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(HTML(value='Progress'), FloatProgress(value=0.0, max=2.0), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"data": {
"text/html": [
@ -205,6 +226,7 @@
}
],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
"ed.csv_to_eland(\"./test_churn.csv\", es_client='localhost', es_dest_index='churn', es_refresh=True, index_col=0)"
]
},

View File

@ -20,7 +20,7 @@
import pytest
from matplotlib.testing.decorators import check_figures_equal
from eland.tests.common import TestData
from tests.common import TestData
@check_figures_equal(extensions=["png"])

View File

@ -18,7 +18,7 @@
# File called _pytest for PyCharm compatability
from matplotlib.testing.decorators import check_figures_equal
from eland.tests.common import TestData
from tests.common import TestData
@check_figures_equal(extensions=["png"])

View File

@ -18,7 +18,7 @@
# File called _pytest for PyCharm compatability
from eland.query import Query
from eland.tests.common import TestData
from tests.common import TestData
class TestQueryCopy(TestData):

View File

@ -20,7 +20,7 @@
import pytest
from eland.query_compiler import QueryCompiler
from eland.tests.common import TestData
from tests.common import TestData
class TestEsMatch(TestData):

View File

@ -19,7 +19,7 @@
import pandas as pd
from pandas.testing import assert_index_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestGetFieldNames(TestData):

View File

@ -19,7 +19,7 @@
import numpy as np
import pytest
from eland.tests.common import TestData, assert_pandas_eland_series_equal
from tests.common import TestData, assert_pandas_eland_series_equal
class TestSeriesArithmetics(TestData):

View File

@ -17,7 +17,7 @@
import pandas as pd
from eland.tests.common import TestData, assert_series_equal
from tests.common import TestData, assert_series_equal
class TestSeriesDescribe(TestData):

View File

@ -21,7 +21,7 @@ import numpy as np
import pandas as pd
from eland.common import EMPTY_SERIES_DTYPE, build_pd_series
from eland.tests.common import assert_series_equal
from tests.common import assert_series_equal
def test_empty_series_dtypes():

View File

@ -17,7 +17,7 @@
# File called _pytest for PyCharm compatability
from eland.tests.common import TestData
from tests.common import TestData
class TestEsMatch(TestData):

View File

@ -19,7 +19,7 @@
import pytest
from eland.tests.common import TestData, assert_pandas_eland_series_equal
from tests.common import TestData, assert_pandas_eland_series_equal
class TestSeriesFilter(TestData):

View File

@ -17,8 +17,8 @@
# File called _pytest for PyCharm compatability
import eland as ed
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData, assert_pandas_eland_series_equal
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData, assert_pandas_eland_series_equal
class TestSeriesHeadTail(TestData):

View File

@ -22,7 +22,7 @@ import pandas as pd
import pytest
from pandas.testing import assert_frame_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestSeriesFrameHist(TestData):

View File

@ -17,7 +17,7 @@
# File called _pytest for PyCharm compatability
from eland.tests.common import TestData
from tests.common import TestData
class TestSeriesInfoEs(TestData):

View File

@ -23,7 +23,7 @@ import numpy as np
import pandas as pd
import pytest
from eland.tests.common import TestData
from tests.common import TestData
class TestSeriesMetrics(TestData):

View File

@ -16,7 +16,7 @@
# under the License.
from eland import eland_to_pandas
from eland.tests.common import TestData, assert_pandas_eland_frame_equal
from tests.common import TestData, assert_pandas_eland_frame_equal
class TestSeriesNA(TestData):

View File

@ -17,8 +17,8 @@
# File called _pytest for PyCharm compatability
import eland as ed
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData, assert_pandas_eland_series_equal
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData, assert_pandas_eland_series_equal
class TestSeriesName(TestData):

View File

@ -17,8 +17,8 @@
# File called _pytest for PyCharm compatability
import eland as ed
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData, assert_pandas_eland_series_equal
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData, assert_pandas_eland_series_equal
class TestSeriesRename(TestData):

View File

@ -19,8 +19,8 @@
import pandas as pd
import eland as ed
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData
class TestSeriesRepr(TestData):

View File

@ -17,8 +17,8 @@
# File called _pytest for PyCharm compatibility
import eland as ed
from eland.tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from eland.tests.common import TestData, assert_pandas_eland_series_equal
from tests import ES_TEST_CLIENT, FLIGHTS_INDEX_NAME
from tests.common import TestData, assert_pandas_eland_series_equal
class TestSeriesSample(TestData):

View File

@ -18,7 +18,7 @@
# File called _pytest for PyCharm compatability
import pytest
from eland.tests.common import TestData, assert_pandas_eland_series_equal
from tests.common import TestData, assert_pandas_eland_series_equal
class TestSeriesArithmetics(TestData):

View File

@ -19,7 +19,7 @@
import pytest
from pandas.testing import assert_series_equal
from eland.tests.common import TestData
from tests.common import TestData
class TestSeriesValueCounts(TestData):

View File

@ -19,7 +19,7 @@ import pandas as pd
from elasticsearch import helpers
from eland.common import es_version
from eland.tests import (
from tests import (
ECOMMERCE_FILE_NAME,
ECOMMERCE_INDEX_NAME,
ECOMMERCE_MAPPING,