craftbeerpi4-pione/venv/lib/python3.8/site-packages/pandas/tests/indexing/test_coercion.py
2021-01-30 22:29:33 +01:00

1110 lines
37 KiB
Python

from datetime import timedelta
import itertools
from typing import Dict, List
import numpy as np
import pytest
import pandas.compat as compat
import pandas as pd
import pandas._testing as tm
###############################################################
# Index / Series common tests which may trigger dtype coercions
###############################################################
@pytest.fixture(autouse=True, scope="class")
def check_comprehensiveness(request):
# Iterate over combination of dtype, method and klass
# and ensure that each are contained within a collected test
cls = request.cls
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
def has_test(combo):
klass, dtype, method = combo
cls_funcs = request.node.session.items
return any(
klass in x.name and dtype in x.name and method in x.name for x in cls_funcs
)
for combo in combos:
if not has_test(combo):
raise AssertionError(f"test method is not defined: {cls.__name__}, {combo}")
yield
class CoercionBase:
klasses = ["index", "series"]
dtypes = [
"object",
"int64",
"float64",
"complex128",
"bool",
"datetime64",
"datetime64tz",
"timedelta64",
"period",
]
@property
def method(self):
raise NotImplementedError(self)
def _assert(self, left, right, dtype):
# explicitly check dtype to avoid any unexpected result
if isinstance(left, pd.Series):
tm.assert_series_equal(left, right)
elif isinstance(left, pd.Index):
tm.assert_index_equal(left, right)
else:
raise NotImplementedError
assert left.dtype == dtype
assert right.dtype == dtype
class TestSetitemCoercion(CoercionBase):
method = "setitem"
def _assert_setitem_series_conversion(
self, original_series, loc_value, expected_series, expected_dtype
):
""" test series value's coercion triggered by assignment """
temp = original_series.copy()
temp[1] = loc_value
tm.assert_series_equal(temp, expected_series)
# check dtype explicitly for sure
assert temp.dtype == expected_dtype
# .loc works different rule, temporary disable
# temp = original_series.copy()
# temp.loc[1] = loc_value
# tm.assert_series_equal(temp, expected_series)
@pytest.mark.parametrize(
"val,exp_dtype", [(1, object), (1.1, object), (1 + 1j, object), (True, object)],
)
def test_setitem_series_object(self, val, exp_dtype):
obj = pd.Series(list("abcd"))
assert obj.dtype == object
exp = pd.Series(["a", val, "c", "d"])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype",
[(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_setitem_series_int64(self, val, exp_dtype, request):
obj = pd.Series([1, 2, 3, 4])
assert obj.dtype == np.int64
if exp_dtype is np.float64:
exp = pd.Series([1, 1, 3, 4])
self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
mark = pytest.mark.xfail(reason="GH12747 The result must be float")
request.node.add_marker(mark)
exp = pd.Series([1, val, 3, 4])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)]
)
def test_setitem_series_int8(self, val, exp_dtype, request):
obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
assert obj.dtype == np.int8
if exp_dtype is np.int16:
exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
self._assert_setitem_series_conversion(obj, val, exp, np.int8)
mark = pytest.mark.xfail(
reason="BUG: it must be Series([1, 1, 3, 4], dtype=np.int16"
)
request.node.add_marker(mark)
exp = pd.Series([1, val, 3, 4], dtype=np.int8)
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype",
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_setitem_series_float64(self, val, exp_dtype):
obj = pd.Series([1.1, 2.2, 3.3, 4.4])
assert obj.dtype == np.float64
exp = pd.Series([1.1, val, 3.3, 4.4])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype",
[
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, object),
],
)
def test_setitem_series_complex128(self, val, exp_dtype):
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype",
[
(1, np.int64),
(3, np.int64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.bool_),
],
)
def test_setitem_series_bool(self, val, exp_dtype, request):
obj = pd.Series([True, False, True, False])
assert obj.dtype == np.bool_
mark = None
if exp_dtype is np.int64:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool_)
mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be int")
elif exp_dtype is np.float64:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool_)
mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be float")
elif exp_dtype is np.complex128:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool_)
mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be complex")
if mark is not None:
request.node.add_marker(mark)
exp = pd.Series([True, val, True, False])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype",
[(pd.Timestamp("2012-01-01"), "datetime64[ns]"), (1, object), ("x", object)],
)
def test_setitem_series_datetime64(self, val, exp_dtype):
obj = pd.Series(
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2011-01-02"),
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
assert obj.dtype == "datetime64[ns]"
exp = pd.Series(
[
pd.Timestamp("2011-01-01"),
val,
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype",
[
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
(pd.Timestamp("2012-01-01", tz="US/Pacific"), object),
(pd.Timestamp("2012-01-01"), object),
(1, object),
],
)
def test_setitem_series_datetime64tz(self, val, exp_dtype):
tz = "US/Eastern"
obj = pd.Series(
[
pd.Timestamp("2011-01-01", tz=tz),
pd.Timestamp("2011-01-02", tz=tz),
pd.Timestamp("2011-01-03", tz=tz),
pd.Timestamp("2011-01-04", tz=tz),
]
)
assert obj.dtype == "datetime64[ns, US/Eastern]"
exp = pd.Series(
[
pd.Timestamp("2011-01-01", tz=tz),
val,
pd.Timestamp("2011-01-03", tz=tz),
pd.Timestamp("2011-01-04", tz=tz),
]
)
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype",
[(pd.Timedelta("12 day"), "timedelta64[ns]"), (1, object), ("x", object)],
)
def test_setitem_series_timedelta64(self, val, exp_dtype):
obj = pd.Series(
[
pd.Timedelta("1 day"),
pd.Timedelta("2 day"),
pd.Timedelta("3 day"),
pd.Timedelta("4 day"),
]
)
assert obj.dtype == "timedelta64[ns]"
exp = pd.Series(
[pd.Timedelta("1 day"), val, pd.Timedelta("3 day"), pd.Timedelta("4 day")]
)
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
def _assert_setitem_index_conversion(
self, original_series, loc_key, expected_index, expected_dtype
):
""" test index's coercion triggered by assign key """
temp = original_series.copy()
temp[loc_key] = 5
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
tm.assert_series_equal(temp, exp)
# check dtype explicitly for sure
assert temp.index.dtype == expected_dtype
temp = original_series.copy()
temp.loc[loc_key] = 5
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
tm.assert_series_equal(temp, exp)
# check dtype explicitly for sure
assert temp.index.dtype == expected_dtype
@pytest.mark.parametrize(
"val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)]
)
def test_setitem_index_object(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], index=list("abcd"))
assert obj.index.dtype == object
if exp_dtype is IndexError:
temp = obj.copy()
msg = "index 5 is out of bounds for axis 0 with size 4"
with pytest.raises(exp_dtype, match=msg):
temp[5] = 5
else:
exp_index = pd.Index(list("abcd") + [val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)]
)
def test_setitem_index_int64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4])
assert obj.index.dtype == np.int64
exp_index = pd.Index([0, 1, 2, 3, val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
@pytest.mark.parametrize(
"val,exp_dtype", [(5, IndexError), (5.1, np.float64), ("x", object)]
)
def test_setitem_index_float64(self, val, exp_dtype, request):
obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
assert obj.index.dtype == np.float64
if exp_dtype is IndexError:
# float + int -> int
temp = obj.copy()
with pytest.raises(exp_dtype):
temp[5] = 5
mark = pytest.mark.xfail(reason="TODO_GH12747 The result must be float")
request.node.add_marker(mark)
exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
def test_setitem_series_period(self):
pytest.xfail("Test not implemented")
def test_setitem_index_complex128(self):
pytest.xfail("Test not implemented")
def test_setitem_index_bool(self):
pytest.xfail("Test not implemented")
def test_setitem_index_datetime64(self):
pytest.xfail("Test not implemented")
def test_setitem_index_datetime64tz(self):
pytest.xfail("Test not implemented")
def test_setitem_index_timedelta64(self):
pytest.xfail("Test not implemented")
def test_setitem_index_period(self):
pytest.xfail("Test not implemented")
class TestInsertIndexCoercion(CoercionBase):
klasses = ["index"]
method = "insert"
def _assert_insert_conversion(self, original, value, expected, expected_dtype):
""" test coercion triggered by insert """
target = original.copy()
res = target.insert(1, value)
tm.assert_index_equal(res, expected)
assert res.dtype == expected_dtype
@pytest.mark.parametrize(
"insert, coerced_val, coerced_dtype",
[
(1, 1, object),
(1.1, 1.1, object),
(False, False, object),
("x", "x", object),
],
)
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
obj = pd.Index(list("abcd"))
assert obj.dtype == object
exp = pd.Index(["a", coerced_val, "b", "c", "d"])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize(
"insert, coerced_val, coerced_dtype",
[
(1, 1, np.int64),
(1.1, 1.1, np.float64),
(False, 0, np.int64),
("x", "x", object),
],
)
def test_insert_index_int64(self, insert, coerced_val, coerced_dtype):
obj = pd.Int64Index([1, 2, 3, 4])
assert obj.dtype == np.int64
exp = pd.Index([1, coerced_val, 2, 3, 4])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize(
"insert, coerced_val, coerced_dtype",
[
(1, 1.0, np.float64),
(1.1, 1.1, np.float64),
(False, 0.0, np.float64),
("x", "x", object),
],
)
def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
obj = pd.Float64Index([1.0, 2.0, 3.0, 4.0])
assert obj.dtype == np.float64
exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
],
ids=["datetime64", "datetime64tz"],
)
def test_insert_index_datetimes(self, fill_val, exp_dtype):
obj = pd.DatetimeIndex(
["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz
)
assert obj.dtype == exp_dtype
exp = pd.DatetimeIndex(
["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"],
tz=fill_val.tz,
)
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
if fill_val.tz:
msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
obj.insert(1, pd.Timestamp("2012-01-01"))
msg = "Timezones don't match"
with pytest.raises(ValueError, match=msg):
obj.insert(1, pd.Timestamp("2012-01-01", tz="Asia/Tokyo"))
else:
msg = "Cannot compare tz-naive and tz-aware"
with pytest.raises(TypeError, match=msg):
obj.insert(1, pd.Timestamp("2012-01-01", tz="Asia/Tokyo"))
msg = "cannot insert DatetimeArray with incompatible label"
with pytest.raises(TypeError, match=msg):
obj.insert(1, 1)
pytest.xfail("ToDo: must coerce to object")
def test_insert_index_timedelta64(self):
obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"])
assert obj.dtype == "timedelta64[ns]"
# timedelta64 + timedelta64 => timedelta64
exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"])
self._assert_insert_conversion(
obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]"
)
# ToDo: must coerce to object
msg = "cannot insert TimedeltaArray with incompatible label"
with pytest.raises(TypeError, match=msg):
obj.insert(1, pd.Timestamp("2012-01-01"))
# ToDo: must coerce to object
msg = "cannot insert TimedeltaArray with incompatible label"
with pytest.raises(TypeError, match=msg):
obj.insert(1, 1)
@pytest.mark.parametrize(
"insert, coerced_val, coerced_dtype",
[
(pd.Period("2012-01", freq="M"), "2012-01", "period[M]"),
(pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object),
(1, 1, object),
("x", "x", object),
],
)
def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M")
assert obj.dtype == "period[M]"
data = [
pd.Period("2011-01", freq="M"),
coerced_val,
pd.Period("2011-02", freq="M"),
pd.Period("2011-03", freq="M"),
pd.Period("2011-04", freq="M"),
]
if isinstance(insert, pd.Period):
exp = pd.PeriodIndex(data, freq="M")
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
else:
msg = r"Unexpected keyword arguments {'freq'}"
with pytest.raises(TypeError, match=msg):
pd.Index(data, freq="M")
def test_insert_index_complex128(self):
pytest.xfail("Test not implemented")
def test_insert_index_bool(self):
pytest.xfail("Test not implemented")
class TestWhereCoercion(CoercionBase):
method = "where"
def _assert_where_conversion(
self, original, cond, values, expected, expected_dtype
):
""" test coercion triggered by where """
target = original.copy()
res = target.where(cond, values)
self._assert(res, expected, expected_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[(1, object), (1.1, object), (1 + 1j, object), (True, object)],
)
def test_where_object(self, index_or_series, fill_val, exp_dtype):
klass = index_or_series
obj = klass(list("abcd"))
assert obj.dtype == object
cond = klass([True, False, True, False])
if fill_val is True and klass is pd.Series:
ret_val = 1
else:
ret_val = fill_val
exp = klass(["a", ret_val, "c", ret_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(fill_val * x for x in [5, 6, 7, 8])
exp = klass(["a", values[1], "c", values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_where_int64(self, index_or_series, fill_val, exp_dtype):
klass = index_or_series
if klass is pd.Index and exp_dtype is np.complex128:
pytest.skip("Complex Index not supported")
obj = klass([1, 2, 3, 4])
assert obj.dtype == np.int64
cond = klass([True, False, True, False])
exp = klass([1, fill_val, 3, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(x * fill_val for x in [5, 6, 7, 8])
exp = klass([1, values[1], 3, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize(
"fill_val, exp_dtype",
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_where_float64(self, index_or_series, fill_val, exp_dtype):
klass = index_or_series
if klass is pd.Index and exp_dtype is np.complex128:
pytest.skip("Complex Index not supported")
obj = klass([1.1, 2.2, 3.3, 4.4])
assert obj.dtype == np.float64
cond = klass([True, False, True, False])
exp = klass([1.1, fill_val, 3.3, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(x * fill_val for x in [5, 6, 7, 8])
exp = klass([1.1, values[1], 3.3, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, object),
],
)
def test_where_series_complex128(self, fill_val, exp_dtype):
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
cond = pd.Series([True, False, True, False])
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = pd.Series([True, False, True, True])
else:
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
exp = pd.Series([1 + 1j, values[1], 3 + 3j, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[(1, object), (1.1, object), (1 + 1j, object), (True, np.bool_)],
)
def test_where_series_bool(self, fill_val, exp_dtype):
obj = pd.Series([True, False, True, False])
assert obj.dtype == np.bool_
cond = pd.Series([True, False, True, False])
exp = pd.Series([True, fill_val, True, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = pd.Series([True, False, True, True])
else:
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
exp = pd.Series([True, values[1], True, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize(
"fill_val,exp_dtype",
[
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
],
ids=["datetime64", "datetime64tz"],
)
def test_where_series_datetime64(self, fill_val, exp_dtype):
obj = pd.Series(
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2011-01-02"),
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
assert obj.dtype == "datetime64[ns]"
cond = pd.Series([True, False, True, False])
exp = pd.Series(
[pd.Timestamp("2011-01-01"), fill_val, pd.Timestamp("2011-01-03"), fill_val]
)
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
values = pd.Series(pd.date_range(fill_val, periods=4))
if fill_val.tz:
exp = pd.Series(
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2012-01-02 00:00", tz="US/Eastern"),
pd.Timestamp("2011-01-03"),
pd.Timestamp("2012-01-04 00:00", tz="US/Eastern"),
]
)
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
exp = pd.Series(
[
pd.Timestamp("2011-01-01"),
values[1],
pd.Timestamp("2011-01-03"),
values[3],
]
)
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize(
"fill_val",
[
pd.Timestamp("2012-01-01"),
pd.Timestamp("2012-01-01").to_datetime64(),
pd.Timestamp("2012-01-01").to_pydatetime(),
],
)
def test_where_index_datetime(self, fill_val):
exp_dtype = "datetime64[ns]"
obj = pd.Index(
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2011-01-02"),
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
assert obj.dtype == "datetime64[ns]"
cond = pd.Index([True, False, True, False])
result = obj.where(cond, fill_val)
expected = pd.DatetimeIndex([obj[0], fill_val, obj[2], fill_val])
tm.assert_index_equal(result, expected)
values = pd.Index(pd.date_range(fill_val, periods=4))
exp = pd.Index(
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2012-01-02"),
pd.Timestamp("2011-01-03"),
pd.Timestamp("2012-01-04"),
]
)
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.xfail(reason="GH 22839: do not ignore timezone, must be object")
def test_where_index_datetime64tz(self):
fill_val = pd.Timestamp("2012-01-01", tz="US/Eastern")
exp_dtype = object
obj = pd.Index(
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2011-01-02"),
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
assert obj.dtype == "datetime64[ns]"
cond = pd.Index([True, False, True, False])
msg = "Index\\(\\.\\.\\.\\) must be called with a collection of some kind"
with pytest.raises(TypeError, match=msg):
obj.where(cond, fill_val)
values = pd.Index(pd.date_range(fill_val, periods=4))
exp = pd.Index(
[
pd.Timestamp("2011-01-01"),
pd.Timestamp("2012-01-02", tz="US/Eastern"),
pd.Timestamp("2011-01-03"),
pd.Timestamp("2012-01-04", tz="US/Eastern"),
],
dtype=exp_dtype,
)
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
def test_where_index_complex128(self):
pytest.xfail("Test not implemented")
def test_where_index_bool(self):
pytest.xfail("Test not implemented")
def test_where_series_timedelta64(self):
pytest.xfail("Test not implemented")
def test_where_series_period(self):
pytest.xfail("Test not implemented")
@pytest.mark.parametrize(
"value", [pd.Timedelta(days=9), timedelta(days=9), np.timedelta64(9, "D")]
)
def test_where_index_timedelta64(self, value):
tdi = pd.timedelta_range("1 Day", periods=4)
cond = np.array([True, False, False, True])
expected = pd.TimedeltaIndex(["1 Day", value, value, "4 Days"])
result = tdi.where(cond, value)
tm.assert_index_equal(result, expected)
msg = "Where requires matching dtype"
with pytest.raises(TypeError, match=msg):
# wrong-dtyped NaT
tdi.where(cond, np.datetime64("NaT", "ns"))
def test_where_index_period(self):
dti = pd.date_range("2016-01-01", periods=3, freq="QS")
pi = dti.to_period("Q")
cond = np.array([False, True, False])
# Passinga valid scalar
value = pi[-1] + pi.freq * 10
expected = pd.PeriodIndex([value, pi[1], value])
result = pi.where(cond, value)
tm.assert_index_equal(result, expected)
# Case passing ndarray[object] of Periods
other = np.asarray(pi + pi.freq * 10, dtype=object)
result = pi.where(cond, other)
expected = pd.PeriodIndex([other[0], pi[1], other[2]])
tm.assert_index_equal(result, expected)
# Passing a mismatched scalar
msg = "Where requires matching dtype"
with pytest.raises(TypeError, match=msg):
pi.where(cond, pd.Timedelta(days=4))
with pytest.raises(TypeError, match=msg):
pi.where(cond, pd.Period("2020-04-21", "D"))
class TestFillnaSeriesCoercion(CoercionBase):
# not indexing, but place here for consistency
method = "fillna"
def test_has_comprehensive_tests(self):
pytest.xfail("Test not implemented")
def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
""" test coercion triggered by fillna """
target = original.copy()
res = target.fillna(value)
self._assert(res, expected, expected_dtype)
@pytest.mark.parametrize(
"fill_val, fill_dtype",
[(1, object), (1.1, object), (1 + 1j, object), (True, object)],
)
def test_fillna_object(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
obj = klass(["a", np.nan, "c", "d"])
assert obj.dtype == object
exp = klass(["a", fill_val, "c", "d"])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val,fill_dtype",
[(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
)
def test_fillna_float64(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
obj = klass([1.1, np.nan, 3.3, 4.4])
assert obj.dtype == np.float64
exp = klass([1.1, fill_val, 3.3, 4.4])
# float + complex -> we don't support a complex Index
# complex for Series,
# object for Index
if fill_dtype == np.complex128 and klass == pd.Index:
fill_dtype = object
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val,fill_dtype",
[
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, object),
],
)
def test_fillna_series_complex128(self, fill_val, fill_dtype):
obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val,fill_dtype",
[
(pd.Timestamp("2012-01-01"), "datetime64[ns]"),
(pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
(1, object),
("x", object),
],
ids=["datetime64", "datetime64tz", "object", "object"],
)
def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
obj = klass(
[
pd.Timestamp("2011-01-01"),
pd.NaT,
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
assert obj.dtype == "datetime64[ns]"
exp = klass(
[
pd.Timestamp("2011-01-01"),
fill_val,
pd.Timestamp("2011-01-03"),
pd.Timestamp("2011-01-04"),
]
)
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize(
"fill_val,fill_dtype",
[
(pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
(pd.Timestamp("2012-01-01"), object),
(pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), object),
(1, object),
("x", object),
],
)
def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
tz = "US/Eastern"
obj = klass(
[
pd.Timestamp("2011-01-01", tz=tz),
pd.NaT,
pd.Timestamp("2011-01-03", tz=tz),
pd.Timestamp("2011-01-04", tz=tz),
]
)
assert obj.dtype == "datetime64[ns, US/Eastern]"
exp = klass(
[
pd.Timestamp("2011-01-01", tz=tz),
fill_val,
pd.Timestamp("2011-01-03", tz=tz),
pd.Timestamp("2011-01-04", tz=tz),
]
)
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
def test_fillna_series_int64(self):
pytest.xfail("Test not implemented")
def test_fillna_index_int64(self):
pytest.xfail("Test not implemented")
def test_fillna_series_bool(self):
pytest.xfail("Test not implemented")
def test_fillna_index_bool(self):
pytest.xfail("Test not implemented")
def test_fillna_series_timedelta64(self):
pytest.xfail("Test not implemented")
def test_fillna_series_period(self):
pytest.xfail("Test not implemented")
def test_fillna_index_timedelta64(self):
pytest.xfail("Test not implemented")
def test_fillna_index_period(self):
pytest.xfail("Test not implemented")
class TestReplaceSeriesCoercion(CoercionBase):
klasses = ["series"]
method = "replace"
rep: Dict[str, List] = {}
rep["object"] = ["a", "b"]
rep["int64"] = [4, 5]
rep["float64"] = [1.1, 2.2]
rep["complex128"] = [1 + 1j, 2 + 2j]
rep["bool"] = [True, False]
rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")]
for tz in ["UTC", "US/Eastern"]:
# to test tz => different tz replacement
key = f"datetime64[ns, {tz}]"
rep[key] = [
pd.Timestamp("2011-01-01", tz=tz),
pd.Timestamp("2011-01-03", tz=tz),
]
rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
@pytest.mark.parametrize("how", ["dict", "series"])
@pytest.mark.parametrize(
"to_key",
[
"object",
"int64",
"float64",
"complex128",
"bool",
"datetime64[ns]",
"datetime64[ns, UTC]",
"datetime64[ns, US/Eastern]",
"timedelta64[ns]",
],
ids=[
"object",
"int64",
"float64",
"complex128",
"bool",
"datetime64",
"datetime64tz",
"datetime64tz",
"timedelta64",
],
)
@pytest.mark.parametrize(
"from_key",
[
"object",
"int64",
"float64",
"complex128",
"bool",
"datetime64[ns]",
"datetime64[ns, UTC]",
"datetime64[ns, US/Eastern]",
"timedelta64[ns]",
],
)
def test_replace_series(self, how, to_key, from_key):
index = pd.Index([3, 4], name="xxx")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key
if from_key.startswith("datetime") and to_key.startswith("datetime"):
# tested below
return
elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]:
# tested below
return
if how == "dict":
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == "series":
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
if (from_key == "float64" and to_key in ("int64")) or (
from_key == "complex128" and to_key in ("int64", "float64")
):
if compat.is_platform_32bit() or compat.is_platform_windows():
pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}")
# Expected: do not downcast by replacement
exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key)
else:
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize("how", ["dict", "series"])
@pytest.mark.parametrize(
"to_key",
["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"],
)
@pytest.mark.parametrize(
"from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"]
)
def test_replace_series_datetime_tz(self, how, to_key, from_key):
index = pd.Index([3, 4], name="xyz")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key
if how == "dict":
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == "series":
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize("how", ["dict", "series"])
@pytest.mark.parametrize(
"to_key",
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
)
@pytest.mark.parametrize(
"from_key",
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
)
def test_replace_series_datetime_datetime(self, how, to_key, from_key):
index = pd.Index([3, 4], name="xyz")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key
if how == "dict":
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == "series":
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
def test_replace_series_period(self):
pytest.xfail("Test not implemented")