From 9bbe9bbb1c0f68433a2365401303f0603d8572ef Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Mon, 25 Nov 2019 16:15:50 +0000 Subject: [PATCH] Fixing issue with addition for strings e.g. df['currency']+1 --- eland/series.py | 6 +- eland/tests/series/test_arithmetics_pytest.py | 64 ++++++++++++++++++- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/eland/series.py b/eland/series.py index 0a3df97..5b69b3f 100644 --- a/eland/series.py +++ b/eland/series.py @@ -1009,7 +1009,7 @@ class Series(NDFrame): series.name = None return series - elif np.issubdtype(np.dtype(type(right)), np.number): # allow np types + elif np.issubdtype(np.dtype(type(right)), np.number) and np.issubdtype(self._dtype, np.number): new_field_name = "{0}_{1}_{2}".format(self.name, method_name, str(right).replace('.', '_')) # Compatible, so create new Series @@ -1021,6 +1021,7 @@ class Series(NDFrame): return series else: + # TODO - support limited ops on strings https://github.com/elastic/eland/issues/65 raise TypeError( "unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(right)) ) @@ -1033,7 +1034,7 @@ class Series(NDFrame): if isinstance(left, Series): # if both are Series, revese args and call normal op method and remove 'r' from radd etc. return left._numeric_op(self, op_method_name) - elif np.issubdtype(np.dtype(type(left)), np.number): # allow np types + elif np.issubdtype(np.dtype(type(left)), np.number) and np.issubdtype(self._dtype, np.number): # Prefix new field name with 'f_' so it's a valid ES field name new_field_name = "f_{0}_{1}_{2}".format(str(left).replace('.', '_'), op_method_name, self.name) @@ -1046,6 +1047,7 @@ class Series(NDFrame): return series else: + # TODO - support limited ops on strings https://github.com/elastic/eland/issues/65 raise TypeError( "unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(left)) ) diff --git a/eland/tests/series/test_arithmetics_pytest.py b/eland/tests/series/test_arithmetics_pytest.py index 7a50cc9..c3c0666 100644 --- a/eland/tests/series/test_arithmetics_pytest.py +++ b/eland/tests/series/test_arithmetics_pytest.py @@ -14,7 +14,7 @@ class TestSeriesArithmetics(TestData): # eland / pandas == error with pytest.raises(TypeError): - ed_df['total_quantity'] / pd_df['taxful_total_price'] + ed_series = ed_df['total_quantity'] / pd_df['taxful_total_price'] def test_ecommerce_series_basic_arithmetics(self): pd_df = self.pd_ecommerce().head(100) @@ -97,6 +97,10 @@ class TestSeriesArithmetics(TestData): pd_series = getattr(pd_df['currency'], op)(pd_df['total_quantity']) with pytest.raises(TypeError): ed_series = getattr(ed_df['currency'], op)(ed_df['total_quantity']) + with pytest.raises(TypeError): + pd_series = getattr(pd_df['currency'], op)(1) + with pytest.raises(TypeError): + ed_series = getattr(ed_df['currency'], op)(1) # int op str (throws) for op in non_string_numeric_ops: @@ -140,3 +144,61 @@ class TestSeriesArithmetics(TestData): pd_series = getattr(pd_df['taxful_total_price'], op)(int(6)) ed_series = getattr(ed_df['taxful_total_price'], op)(int(6)) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) + + def test_supported_series_dtypes_rops(self): + pd_df = self.pd_ecommerce().head(100) + ed_df = self.ed_ecommerce().head(100) + + # Test some specific operations that are and aren't supported + numeric_ops = ['__radd__', + '__rtruediv__', + '__rfloordiv__', + '__rpow__', + '__rmod__', + '__rmul__', + '__rsub__'] + + non_string_numeric_ops = ['__radd__', + '__rtruediv__', + '__rfloordiv__', + '__rpow__', + '__rmod__', + '__rsub__'] + # __rmul__ is supported for int * str in pandas + + # float op float + for op in numeric_ops: + pd_series = getattr(pd_df['taxful_total_price'], op)(pd_df['taxless_total_price']) + ed_series = getattr(ed_df['taxful_total_price'], op)(ed_df['taxless_total_price']) + assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) + + # int op float + for op in numeric_ops: + pd_series = getattr(pd_df['total_quantity'], op)(pd_df['taxless_total_price']) + ed_series = getattr(ed_df['total_quantity'], op)(ed_df['taxless_total_price']) + assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) + + # float op int + for op in numeric_ops: + pd_series = getattr(pd_df['taxful_total_price'], op)(pd_df['total_quantity']) + ed_series = getattr(ed_df['taxful_total_price'], op)(ed_df['total_quantity']) + assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) + + # str op int (throws) + for op in non_string_numeric_ops: + print(op) + with pytest.raises(TypeError): + pd_series = getattr(pd_df['currency'], op)(pd_df['total_quantity']) + with pytest.raises(TypeError): + ed_series = getattr(ed_df['currency'], op)(ed_df['total_quantity']) + with pytest.raises(TypeError): + pd_series = getattr(pd_df['currency'], op)(10.0) + with pytest.raises(TypeError): + ed_series = getattr(ed_df['currency'], op)(10.0) + + # int op str (throws) + for op in non_string_numeric_ops: + with pytest.raises(TypeError): + pd_series = getattr(pd_df['total_quantity'], op)(pd_df['currency']) + with pytest.raises(TypeError): + ed_series = getattr(ed_df['total_quantity'], op)(ed_df['currency'])