Fixing issue with addition for strings

e.g. df['currency']+1
This commit is contained in:
Stephen Dodson 2019-11-25 16:15:50 +00:00
parent 85422e2023
commit 9bbe9bbb1c
2 changed files with 67 additions and 3 deletions

View File

@ -1009,7 +1009,7 @@ class Series(NDFrame):
series.name = None
return series
elif np.issubdtype(np.dtype(type(right)), np.number): # allow np types
elif np.issubdtype(np.dtype(type(right)), np.number) and np.issubdtype(self._dtype, np.number):
new_field_name = "{0}_{1}_{2}".format(self.name, method_name, str(right).replace('.', '_'))
# Compatible, so create new Series
@ -1021,6 +1021,7 @@ class Series(NDFrame):
return series
else:
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
raise TypeError(
"unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(right))
)
@ -1033,7 +1034,7 @@ class Series(NDFrame):
if isinstance(left, Series):
# if both are Series, revese args and call normal op method and remove 'r' from radd etc.
return left._numeric_op(self, op_method_name)
elif np.issubdtype(np.dtype(type(left)), np.number): # allow np types
elif np.issubdtype(np.dtype(type(left)), np.number) and np.issubdtype(self._dtype, np.number):
# Prefix new field name with 'f_' so it's a valid ES field name
new_field_name = "f_{0}_{1}_{2}".format(str(left).replace('.', '_'), op_method_name, self.name)
@ -1046,6 +1047,7 @@ class Series(NDFrame):
return series
else:
# TODO - support limited ops on strings https://github.com/elastic/eland/issues/65
raise TypeError(
"unsupported operand type(s) for '{}' {} '{}'".format(type(self), method_name, type(left))
)

View File

@ -14,7 +14,7 @@ class TestSeriesArithmetics(TestData):
# eland / pandas == error
with pytest.raises(TypeError):
ed_df['total_quantity'] / pd_df['taxful_total_price']
ed_series = ed_df['total_quantity'] / pd_df['taxful_total_price']
def test_ecommerce_series_basic_arithmetics(self):
pd_df = self.pd_ecommerce().head(100)
@ -97,6 +97,10 @@ class TestSeriesArithmetics(TestData):
pd_series = getattr(pd_df['currency'], op)(pd_df['total_quantity'])
with pytest.raises(TypeError):
ed_series = getattr(ed_df['currency'], op)(ed_df['total_quantity'])
with pytest.raises(TypeError):
pd_series = getattr(pd_df['currency'], op)(1)
with pytest.raises(TypeError):
ed_series = getattr(ed_df['currency'], op)(1)
# int op str (throws)
for op in non_string_numeric_ops:
@ -140,3 +144,61 @@ class TestSeriesArithmetics(TestData):
pd_series = getattr(pd_df['taxful_total_price'], op)(int(6))
ed_series = getattr(ed_df['taxful_total_price'], op)(int(6))
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
def test_supported_series_dtypes_rops(self):
pd_df = self.pd_ecommerce().head(100)
ed_df = self.ed_ecommerce().head(100)
# Test some specific operations that are and aren't supported
numeric_ops = ['__radd__',
'__rtruediv__',
'__rfloordiv__',
'__rpow__',
'__rmod__',
'__rmul__',
'__rsub__']
non_string_numeric_ops = ['__radd__',
'__rtruediv__',
'__rfloordiv__',
'__rpow__',
'__rmod__',
'__rsub__']
# __rmul__ is supported for int * str in pandas
# float op float
for op in numeric_ops:
pd_series = getattr(pd_df['taxful_total_price'], op)(pd_df['taxless_total_price'])
ed_series = getattr(ed_df['taxful_total_price'], op)(ed_df['taxless_total_price'])
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
# int op float
for op in numeric_ops:
pd_series = getattr(pd_df['total_quantity'], op)(pd_df['taxless_total_price'])
ed_series = getattr(ed_df['total_quantity'], op)(ed_df['taxless_total_price'])
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
# float op int
for op in numeric_ops:
pd_series = getattr(pd_df['taxful_total_price'], op)(pd_df['total_quantity'])
ed_series = getattr(ed_df['taxful_total_price'], op)(ed_df['total_quantity'])
assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
# str op int (throws)
for op in non_string_numeric_ops:
print(op)
with pytest.raises(TypeError):
pd_series = getattr(pd_df['currency'], op)(pd_df['total_quantity'])
with pytest.raises(TypeError):
ed_series = getattr(ed_df['currency'], op)(ed_df['total_quantity'])
with pytest.raises(TypeError):
pd_series = getattr(pd_df['currency'], op)(10.0)
with pytest.raises(TypeError):
ed_series = getattr(ed_df['currency'], op)(10.0)
# int op str (throws)
for op in non_string_numeric_ops:
with pytest.raises(TypeError):
pd_series = getattr(pd_df['total_quantity'], op)(pd_df['currency'])
with pytest.raises(TypeError):
ed_series = getattr(ed_df['total_quantity'], op)(ed_df['currency'])