mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Merge pull request #48 from stevedodson/master
Improve to_string/to_html/__repr__/_repr_html_ tests
This commit is contained in:
commit
be4055093b
@ -4,7 +4,7 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import eland as ed
|
import eland as ed
|
||||||
|
|
||||||
# Fix console sizxe for consistent test results
|
# Fix console size for consistent test results
|
||||||
pd.set_option('display.max_rows', 10)
|
pd.set_option('display.max_rows', 10)
|
||||||
pd.set_option('display.max_columns', 5)
|
pd.set_option('display.max_columns', 5)
|
||||||
pd.set_option('display.width', 100)
|
pd.set_option('display.width', 100)
|
||||||
|
@ -19,6 +19,15 @@ from eland import NDFrame
|
|||||||
from eland import Series
|
from eland import Series
|
||||||
from eland.filter import BooleanFilter, ScriptFilter
|
from eland.filter import BooleanFilter, ScriptFilter
|
||||||
|
|
||||||
|
# Default number of rows displayed (different to pandas where ALL could be displayed)
|
||||||
|
DEFAULT_NUM_ROWS_DISPLAYED = 60
|
||||||
|
|
||||||
|
def docstring_parameter(*sub):
|
||||||
|
def dec(obj):
|
||||||
|
obj.__doc__ = obj.__doc__.format(*sub)
|
||||||
|
return obj
|
||||||
|
return dec
|
||||||
|
|
||||||
|
|
||||||
class DataFrame(NDFrame):
|
class DataFrame(NDFrame):
|
||||||
"""
|
"""
|
||||||
@ -275,7 +284,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
def _repr_html_(self):
|
def _repr_html_(self):
|
||||||
"""
|
"""
|
||||||
From pandas
|
From pandas - this is called by notebooks
|
||||||
"""
|
"""
|
||||||
if self._info_repr():
|
if self._info_repr():
|
||||||
buf = StringIO("")
|
buf = StringIO("")
|
||||||
@ -288,10 +297,15 @@ class DataFrame(NDFrame):
|
|||||||
if pd.get_option("display.notebook_repr_html"):
|
if pd.get_option("display.notebook_repr_html"):
|
||||||
max_rows = pd.get_option("display.max_rows")
|
max_rows = pd.get_option("display.max_rows")
|
||||||
max_cols = pd.get_option("display.max_columns")
|
max_cols = pd.get_option("display.max_columns")
|
||||||
|
min_rows = pd.get_option("display.min_rows")
|
||||||
show_dimensions = pd.get_option("display.show_dimensions")
|
show_dimensions = pd.get_option("display.show_dimensions")
|
||||||
|
|
||||||
|
if len(self) > max_rows:
|
||||||
|
max_rows = min_rows
|
||||||
|
|
||||||
return self.to_html(max_rows=max_rows, max_cols=max_cols,
|
return self.to_html(max_rows=max_rows, max_cols=max_cols,
|
||||||
show_dimensions=show_dimensions, notebook=True)
|
show_dimensions=show_dimensions, notebook=True,
|
||||||
|
bold_rows=False) # set for consistency with pandas output
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -532,6 +546,7 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
fmt.buffer_put_lines(buf, lines)
|
fmt.buffer_put_lines(buf, lines)
|
||||||
|
|
||||||
|
@docstring_parameter(DEFAULT_NUM_ROWS_DISPLAYED)
|
||||||
def to_html(self, buf=None, columns=None, col_space=None, header=True,
|
def to_html(self, buf=None, columns=None, col_space=None, header=True,
|
||||||
index=True, na_rep='NaN', formatters=None, float_format=None,
|
index=True, na_rep='NaN', formatters=None, float_format=None,
|
||||||
sparsify=None, index_names=True, justify=None, max_rows=None,
|
sparsify=None, index_names=True, justify=None, max_rows=None,
|
||||||
@ -541,15 +556,29 @@ class DataFrame(NDFrame):
|
|||||||
"""
|
"""
|
||||||
Render a Elasticsearch data as an HTML table.
|
Render a Elasticsearch data as an HTML table.
|
||||||
|
|
||||||
|
Follows pandas implementation except when ``max_rows=None``. In this scenario, we set ``max_rows={0}`` to avoid
|
||||||
|
accidentally dumping an entire index. This can be overridden by explicitly setting ``max_rows``.
|
||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_api_docs:`to_html` for argument details.
|
:pandas_api_docs:`to_html` for argument details.
|
||||||
"""
|
"""
|
||||||
if max_rows is None:
|
# In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
|
||||||
|
# by limiting rows by default.
|
||||||
|
num_rows = len(self) # avoid multiple calls
|
||||||
|
if num_rows <= DEFAULT_NUM_ROWS_DISPLAYED:
|
||||||
|
if max_rows is None:
|
||||||
|
max_rows = num_rows
|
||||||
|
else:
|
||||||
|
max_rows = min(num_rows, max_rows)
|
||||||
|
elif max_rows is None:
|
||||||
warnings.warn("DataFrame.to_string called without max_rows set "
|
warnings.warn("DataFrame.to_string called without max_rows set "
|
||||||
"- this will return entire index results. "
|
"- this will return entire index results. "
|
||||||
"Setting max_rows=60, overwrite if different behaviour is required.")
|
"Setting max_rows={default}"
|
||||||
max_rows = 60
|
" overwrite if different behaviour is required."
|
||||||
|
.format(default=DEFAULT_NUM_ROWS_DISPLAYED),
|
||||||
|
UserWarning)
|
||||||
|
max_rows = DEFAULT_NUM_ROWS_DISPLAYED
|
||||||
|
|
||||||
# Create a slightly bigger dataframe than display
|
# Create a slightly bigger dataframe than display
|
||||||
df = self._build_repr_df(max_rows + 1, max_cols)
|
df = self._build_repr_df(max_rows + 1, max_cols)
|
||||||
@ -569,13 +598,16 @@ class DataFrame(NDFrame):
|
|||||||
# Our fake dataframe has incorrect number of rows (max_rows*2+1) - write out
|
# Our fake dataframe has incorrect number of rows (max_rows*2+1) - write out
|
||||||
# the correct number of rows
|
# the correct number of rows
|
||||||
if show_dimensions:
|
if show_dimensions:
|
||||||
_buf.write("\n<p>{nrows} rows x {ncols} columns</p>"
|
# TODO - this results in different output to pandas
|
||||||
|
# TODO - the 'x' character is different and this gets added after the </div>
|
||||||
|
_buf.write("\n<p>{nrows} rows x {ncols} columns</p>"
|
||||||
.format(nrows=len(self.index), ncols=len(self.columns)))
|
.format(nrows=len(self.index), ncols=len(self.columns)))
|
||||||
|
|
||||||
if buf is None:
|
if buf is None:
|
||||||
result = _buf.getvalue()
|
result = _buf.getvalue()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@docstring_parameter(DEFAULT_NUM_ROWS_DISPLAYED)
|
||||||
def to_string(self, buf=None, columns=None, col_space=None, header=True,
|
def to_string(self, buf=None, columns=None, col_space=None, header=True,
|
||||||
index=True, na_rep='NaN', formatters=None, float_format=None,
|
index=True, na_rep='NaN', formatters=None, float_format=None,
|
||||||
sparsify=None, index_names=True, justify=None,
|
sparsify=None, index_names=True, justify=None,
|
||||||
@ -584,17 +616,29 @@ class DataFrame(NDFrame):
|
|||||||
"""
|
"""
|
||||||
Render a DataFrame to a console-friendly tabular output.
|
Render a DataFrame to a console-friendly tabular output.
|
||||||
|
|
||||||
Follows pandas implementation except we set max_rows default to avoid careless extraction of entire index.
|
Follows pandas implementation except when ``max_rows=None``. In this scenario, we set ``max_rows={0}`` to avoid
|
||||||
|
accidentally dumping an entire index. This can be overridden by explicitly setting ``max_rows``.
|
||||||
|
|
||||||
See Also
|
See Also
|
||||||
--------
|
--------
|
||||||
:pandas_api_docs:`to_string` for argument details.
|
:pandas_api_docs:`to_string` for argument details.
|
||||||
"""
|
"""
|
||||||
if max_rows is None:
|
# In pandas calling 'to_string' without max_rows set, will dump ALL rows - we avoid this
|
||||||
|
# by limiting rows by default.
|
||||||
|
num_rows = len(self) # avoid multiple calls
|
||||||
|
if num_rows <= DEFAULT_NUM_ROWS_DISPLAYED:
|
||||||
|
if max_rows is None:
|
||||||
|
max_rows = num_rows
|
||||||
|
else:
|
||||||
|
max_rows = min(num_rows, max_rows)
|
||||||
|
elif max_rows is None:
|
||||||
warnings.warn("DataFrame.to_string called without max_rows set "
|
warnings.warn("DataFrame.to_string called without max_rows set "
|
||||||
"- this will return entire index results. "
|
"- this will return entire index results. "
|
||||||
"Setting max_rows=60, overwrite if different behaviour is required.")
|
"Setting max_rows={default}"
|
||||||
max_rows = 60
|
" overwrite if different behaviour is required."
|
||||||
|
.format(default=DEFAULT_NUM_ROWS_DISPLAYED),
|
||||||
|
UserWarning)
|
||||||
|
max_rows = DEFAULT_NUM_ROWS_DISPLAYED
|
||||||
|
|
||||||
# Create a slightly bigger dataframe than display
|
# Create a slightly bigger dataframe than display
|
||||||
df = self._build_repr_df(max_rows + 1, max_cols)
|
df = self._build_repr_df(max_rows + 1, max_cols)
|
||||||
|
@ -2,57 +2,158 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
from eland.tests.common import TestData
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
from eland.dataframe import DEFAULT_NUM_ROWS_DISPLAYED
|
||||||
|
|
||||||
|
|
||||||
class TestDataFrameRepr(TestData):
|
class TestDataFrameRepr(TestData):
|
||||||
|
|
||||||
def test_head_101_to_string(self):
|
@classmethod
|
||||||
ed_flights = self.ed_flights()
|
def setup_class(cls):
|
||||||
pd_flights = self.pd_flights()
|
# conftest.py changes this default - restore to original setting
|
||||||
|
pd.set_option('display.max_rows', 60)
|
||||||
|
|
||||||
ed_head_101 = ed_flights.head(101)
|
"""
|
||||||
pd_head_101 = pd_flights.head(101)
|
to_string
|
||||||
|
"""
|
||||||
|
def test_num_rows_to_string(self):
|
||||||
|
# check setup works
|
||||||
|
assert pd.get_option('display.max_rows') == 60
|
||||||
|
|
||||||
# This sets max_rows=60 by default (but throws userwarning)
|
# Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
|
||||||
|
# In pandas calling 'to_string' without max_rows set, will dump ALL rows
|
||||||
|
|
||||||
|
# Test n-1, n, n+1 for edge cases
|
||||||
|
self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED-1)
|
||||||
|
self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED)
|
||||||
with pytest.warns(UserWarning):
|
with pytest.warns(UserWarning):
|
||||||
ed_head_101_str = ed_head_101.to_string()
|
# UserWarning displayed by eland here (compare to pandas with max_rows set)
|
||||||
pd_head_101_str = pd_head_101.to_string(max_rows=60)
|
self.num_rows_to_string(DEFAULT_NUM_ROWS_DISPLAYED+1, None, DEFAULT_NUM_ROWS_DISPLAYED)
|
||||||
|
|
||||||
assert pd_head_101_str == ed_head_101_str
|
# Test for where max_rows lt or gt num_rows
|
||||||
|
self.num_rows_to_string(10, 5, 5)
|
||||||
|
self.num_rows_to_string(100, 200, 200)
|
||||||
|
|
||||||
def test_head_11_to_string2(self):
|
def num_rows_to_string(self, rows, max_rows_eland=None, max_rows_pandas=None):
|
||||||
ed_flights = self.ed_flights()
|
ed_flights = self.ed_flights()
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
ed_head_11 = ed_flights.head(11)
|
ed_head = ed_flights.head(rows)
|
||||||
pd_head_11 = pd_flights.head(11)
|
pd_head = pd_flights.head(rows)
|
||||||
|
|
||||||
ed_head_11_str = ed_head_11.to_string(max_rows=60)
|
ed_head_str = ed_head.to_string(max_rows=max_rows_eland)
|
||||||
pd_head_11_str = pd_head_11.to_string(max_rows=60)
|
pd_head_str = pd_head.to_string(max_rows=max_rows_pandas)
|
||||||
|
|
||||||
assert pd_head_11_str == ed_head_11_str
|
#print(ed_head_str)
|
||||||
|
#print(pd_head_str)
|
||||||
|
|
||||||
def test_less_than_max_rows_to_string(self):
|
assert pd_head_str == ed_head_str
|
||||||
|
|
||||||
|
"""
|
||||||
|
repr
|
||||||
|
"""
|
||||||
|
def test_num_rows_repr(self):
|
||||||
ed_flights = self.ed_flights()
|
ed_flights = self.ed_flights()
|
||||||
pd_flights = self.pd_flights()
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
ed_less_than_max = ed_flights[ed_flights['AvgTicketPrice']>1190]
|
self.num_rows_repr(pd.get_option('display.max_rows')-1, pd.get_option('display.max_rows')-1)
|
||||||
pd_less_than_max = pd_flights[pd_flights['AvgTicketPrice']>1190]
|
self.num_rows_repr(pd.get_option('display.max_rows'), pd.get_option('display.max_rows'))
|
||||||
|
self.num_rows_repr(pd.get_option('display.max_rows')+1, pd.get_option('display.min_rows'))
|
||||||
|
|
||||||
ed_less_than_max_str = ed_less_than_max.to_string()
|
def num_rows_repr(self, rows, num_rows_printed):
|
||||||
pd_less_than_max_str = pd_less_than_max.to_string()
|
ed_flights = self.ed_flights()
|
||||||
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
assert pd_less_than_max_str == ed_less_than_max_str
|
ed_head = ed_flights.head(rows)
|
||||||
|
pd_head = pd_flights.head(rows)
|
||||||
|
|
||||||
def test_repr(self):
|
ed_head_str = repr(ed_head)
|
||||||
ed_ecommerce = self.ed_ecommerce()
|
pd_head_str = repr(pd_head)
|
||||||
pd_ecommerce = self.pd_ecommerce()
|
|
||||||
|
|
||||||
ed_head_18 = ed_ecommerce.head(18)
|
if num_rows_printed < rows:
|
||||||
pd_head_18 = pd_ecommerce.head(18)
|
# add 1 for ellipsis
|
||||||
|
num_rows_printed = num_rows_printed + 1
|
||||||
|
|
||||||
ed_head_18_repr = repr(ed_head_18)
|
# number of rows is num_rows_printed + 3 (header, summary)
|
||||||
pd_head_18_repr = repr(pd_head_18)
|
assert (num_rows_printed+3) == len(ed_head_str.splitlines())
|
||||||
|
|
||||||
assert ed_head_18_repr == pd_head_18_repr
|
assert pd_head_str == ed_head_str
|
||||||
|
|
||||||
|
"""
|
||||||
|
to_html
|
||||||
|
"""
|
||||||
|
def test_num_rows_to_html(self):
|
||||||
|
# check setup works
|
||||||
|
assert pd.get_option('display.max_rows') == 60
|
||||||
|
|
||||||
|
# Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
|
||||||
|
# In pandas calling 'to_string' without max_rows set, will dump ALL rows
|
||||||
|
|
||||||
|
# Test n-1, n, n+1 for edge cases
|
||||||
|
self.num_rows_to_html(DEFAULT_NUM_ROWS_DISPLAYED-1)
|
||||||
|
self.num_rows_to_html(DEFAULT_NUM_ROWS_DISPLAYED)
|
||||||
|
with pytest.warns(UserWarning):
|
||||||
|
# UserWarning displayed by eland here
|
||||||
|
self.num_rows_to_html(DEFAULT_NUM_ROWS_DISPLAYED+1, None, DEFAULT_NUM_ROWS_DISPLAYED)
|
||||||
|
|
||||||
|
# Test for where max_rows lt or gt num_rows
|
||||||
|
self.num_rows_to_html(10, 5, 5)
|
||||||
|
self.num_rows_to_html(100, 200, 200)
|
||||||
|
|
||||||
|
def num_rows_to_html(self, rows, max_rows_eland=None, max_rows_pandas=None):
|
||||||
|
ed_flights = self.ed_flights()
|
||||||
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
|
ed_head = ed_flights.head(rows)
|
||||||
|
pd_head = pd_flights.head(rows)
|
||||||
|
|
||||||
|
ed_head_str = ed_head.to_html(max_rows=max_rows_eland)
|
||||||
|
pd_head_str = pd_head.to_html(max_rows=max_rows_pandas)
|
||||||
|
|
||||||
|
#print(ed_head_str)
|
||||||
|
#print(pd_head_str)
|
||||||
|
|
||||||
|
assert pd_head_str == ed_head_str
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
_repr_html_
|
||||||
|
"""
|
||||||
|
def test_num_rows_repr_html(self):
|
||||||
|
# check setup works
|
||||||
|
assert pd.get_option('display.max_rows') == 60
|
||||||
|
|
||||||
|
show_dimensions = pd.get_option('display.show_dimensions')
|
||||||
|
|
||||||
|
# TODO - there is a bug in 'show_dimensions' as it gets added after the last </div>
|
||||||
|
# For now test without this
|
||||||
|
pd.set_option('display.show_dimensions', False)
|
||||||
|
|
||||||
|
# Test eland.DataFrame.to_string vs pandas.DataFrame.to_string
|
||||||
|
# In pandas calling 'to_string' without max_rows set, will dump ALL rows
|
||||||
|
|
||||||
|
# Test n-1, n, n+1 for edge cases
|
||||||
|
self.num_rows_repr_html(pd.get_option('display.max_rows')-1)
|
||||||
|
self.num_rows_repr_html(pd.get_option('display.max_rows'))
|
||||||
|
self.num_rows_repr_html(pd.get_option('display.max_rows')+1, pd.get_option('display.max_rows'))
|
||||||
|
|
||||||
|
# Restore default
|
||||||
|
pd.set_option('display.show_dimensions', show_dimensions)
|
||||||
|
|
||||||
|
def num_rows_repr_html(self, rows, max_rows=None):
|
||||||
|
ed_flights = self.ed_flights()
|
||||||
|
pd_flights = self.pd_flights()
|
||||||
|
|
||||||
|
ed_head = ed_flights.head(rows)
|
||||||
|
pd_head = pd_flights.head(rows)
|
||||||
|
|
||||||
|
ed_head_str = ed_head._repr_html_()
|
||||||
|
pd_head_str = pd_head._repr_html_()
|
||||||
|
|
||||||
|
#print(ed_head_str)
|
||||||
|
#print(pd_head_str)
|
||||||
|
|
||||||
|
assert pd_head_str == ed_head_str
|
||||||
|
Loading…
x
Reference in New Issue
Block a user