mirror of
https://github.com/PiBrewing/craftbeerpi4.git
synced 2024-11-27 01:08:12 +01:00
705 lines
24 KiB
Python
705 lines
24 KiB
Python
|
"""
|
||
|
test setting *parts* of objects both positionally and label based
|
||
|
|
||
|
TODO: these should be split among the indexer tests
|
||
|
"""
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import DataFrame, Index, Period, Series, Timestamp, date_range, period_range
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestPartialSetting:
|
||
|
def test_partial_setting(self):
|
||
|
|
||
|
# GH2578, allow ix and friends to partially set
|
||
|
|
||
|
# series
|
||
|
s_orig = Series([1, 2, 3])
|
||
|
|
||
|
s = s_orig.copy()
|
||
|
s[5] = 5
|
||
|
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||
|
tm.assert_series_equal(s, expected)
|
||
|
|
||
|
s = s_orig.copy()
|
||
|
s.loc[5] = 5
|
||
|
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||
|
tm.assert_series_equal(s, expected)
|
||
|
|
||
|
s = s_orig.copy()
|
||
|
s[5] = 5.0
|
||
|
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||
|
tm.assert_series_equal(s, expected)
|
||
|
|
||
|
s = s_orig.copy()
|
||
|
s.loc[5] = 5.0
|
||
|
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||
|
tm.assert_series_equal(s, expected)
|
||
|
|
||
|
# iloc/iat raise
|
||
|
s = s_orig.copy()
|
||
|
|
||
|
msg = "iloc cannot enlarge its target object"
|
||
|
with pytest.raises(IndexError, match=msg):
|
||
|
s.iloc[3] = 5.0
|
||
|
|
||
|
msg = "index 3 is out of bounds for axis 0 with size 3"
|
||
|
with pytest.raises(IndexError, match=msg):
|
||
|
s.iat[3] = 5.0
|
||
|
|
||
|
# ## frame ##
|
||
|
|
||
|
df_orig = DataFrame(
|
||
|
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
|
||
|
)
|
||
|
|
||
|
# iloc/iat raise
|
||
|
df = df_orig.copy()
|
||
|
|
||
|
msg = "iloc cannot enlarge its target object"
|
||
|
with pytest.raises(IndexError, match=msg):
|
||
|
df.iloc[4, 2] = 5.0
|
||
|
|
||
|
msg = "index 2 is out of bounds for axis 0 with size 2"
|
||
|
with pytest.raises(IndexError, match=msg):
|
||
|
df.iat[4, 2] = 5.0
|
||
|
|
||
|
# row setting where it exists
|
||
|
expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
|
||
|
df = df_orig.copy()
|
||
|
df.iloc[1] = df.iloc[2]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
|
||
|
df = df_orig.copy()
|
||
|
df.loc[1] = df.loc[2]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# like 2578, partial setting with dtype preservation
|
||
|
expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]}))
|
||
|
df = df_orig.copy()
|
||
|
df.loc[3] = df.loc[2]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# single dtype frame, overwrite
|
||
|
expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]}))
|
||
|
df = df_orig.copy()
|
||
|
df.loc[:, "B"] = df.loc[:, "A"]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# mixed dtype frame, overwrite
|
||
|
expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])}))
|
||
|
df = df_orig.copy()
|
||
|
df["B"] = df["B"].astype(np.float64)
|
||
|
df.loc[:, "B"] = df.loc[:, "A"]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# single dtype frame, partial setting
|
||
|
expected = df_orig.copy()
|
||
|
expected["C"] = df["A"]
|
||
|
df = df_orig.copy()
|
||
|
df.loc[:, "C"] = df.loc[:, "A"]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# mixed frame, partial setting
|
||
|
expected = df_orig.copy()
|
||
|
expected["C"] = df["A"]
|
||
|
df = df_orig.copy()
|
||
|
df.loc[:, "C"] = df.loc[:, "A"]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 8473
|
||
|
dates = date_range("1/1/2000", periods=8)
|
||
|
df_orig = DataFrame(
|
||
|
np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]
|
||
|
)
|
||
|
|
||
|
expected = pd.concat(
|
||
|
[df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True
|
||
|
)
|
||
|
df = df_orig.copy()
|
||
|
df.loc[dates[-1] + dates.freq, "A"] = 7
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
df = df_orig.copy()
|
||
|
df.at[dates[-1] + dates.freq, "A"] = 7
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq)
|
||
|
expected = pd.concat([df_orig, exp_other], axis=1)
|
||
|
|
||
|
df = df_orig.copy()
|
||
|
df.loc[dates[-1] + dates.freq, 0] = 7
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
df = df_orig.copy()
|
||
|
df.at[dates[-1] + dates.freq, 0] = 7
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_partial_setting_mixed_dtype(self):
|
||
|
|
||
|
# in a mixed dtype environment, try to preserve dtypes
|
||
|
# by appending
|
||
|
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
|
||
|
|
||
|
s = df.loc[1].copy()
|
||
|
s.name = 2
|
||
|
expected = df.append(s)
|
||
|
|
||
|
df.loc[2] = df.loc[1]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# columns will align
|
||
|
df = DataFrame(columns=["A", "B"])
|
||
|
df.loc[0] = Series(1, index=range(4))
|
||
|
tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0]))
|
||
|
|
||
|
# columns will align
|
||
|
df = DataFrame(columns=["A", "B"])
|
||
|
df.loc[0] = Series(1, index=["B"])
|
||
|
|
||
|
exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
|
||
|
tm.assert_frame_equal(df, exp)
|
||
|
|
||
|
# list-like must conform
|
||
|
df = DataFrame(columns=["A", "B"])
|
||
|
|
||
|
msg = "cannot set a row with mismatched columns"
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
df.loc[0] = [1, 2, 3]
|
||
|
|
||
|
# TODO: #15657, these are left as object and not coerced
|
||
|
df = DataFrame(columns=["A", "B"])
|
||
|
df.loc[3] = [6, 7]
|
||
|
|
||
|
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
|
||
|
tm.assert_frame_equal(df, exp)
|
||
|
|
||
|
def test_series_partial_set(self):
|
||
|
# partial set with new index
|
||
|
# Regression from GH4825
|
||
|
ser = Series([0.1, 0.2], index=[1, 2])
|
||
|
|
||
|
# loc equiv to .reindex
|
||
|
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
result = ser.loc[[3, 2, 3]]
|
||
|
|
||
|
result = ser.reindex([3, 2, 3])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
result = ser.loc[[3, 2, 3, "x"]]
|
||
|
|
||
|
result = ser.reindex([3, 2, 3, "x"])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
|
||
|
result = ser.loc[[2, 2, 1]]
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
result = ser.loc[[2, 2, "x", 1]]
|
||
|
|
||
|
result = ser.reindex([2, 2, "x", 1])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
# raises as nothing in in the index
|
||
|
msg = (
|
||
|
r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are "
|
||
|
r"in the \[index\]\""
|
||
|
)
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
ser.loc[[3, 3, 3]]
|
||
|
|
||
|
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
ser.loc[[2, 2, 3]]
|
||
|
|
||
|
result = ser.reindex([2, 2, 3])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
|
||
|
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
s.loc[[3, 4, 4]]
|
||
|
|
||
|
result = s.reindex([3, 4, 4])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||
|
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
s.loc[[5, 3, 3]]
|
||
|
|
||
|
result = s.reindex([5, 3, 3])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||
|
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
s.loc[[5, 4, 4]]
|
||
|
|
||
|
result = s.reindex([5, 4, 4])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
|
||
|
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
s.loc[[7, 2, 2]]
|
||
|
|
||
|
result = s.reindex([7, 2, 2])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||
|
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
s.loc[[4, 5, 5]]
|
||
|
|
||
|
result = s.reindex([4, 5, 5])
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
# iloc
|
||
|
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
|
||
|
result = ser.iloc[[1, 1, 0, 0]]
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
def test_series_partial_set_with_name(self):
|
||
|
# GH 11497
|
||
|
|
||
|
idx = Index([1, 2], dtype="int64", name="idx")
|
||
|
ser = Series([0.1, 0.2], index=idx, name="s")
|
||
|
|
||
|
# loc
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
ser.loc[[3, 2, 3]]
|
||
|
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
ser.loc[[3, 2, 3, "x"]]
|
||
|
|
||
|
exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
|
||
|
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
|
||
|
result = ser.loc[[2, 2, 1]]
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
ser.loc[[2, 2, "x", 1]]
|
||
|
|
||
|
# raises as nothing in in the index
|
||
|
msg = (
|
||
|
r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', "
|
||
|
r"name='idx'\)\] are in the \[index\]\""
|
||
|
)
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
ser.loc[[3, 3, 3]]
|
||
|
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
ser.loc[[2, 2, 3]]
|
||
|
|
||
|
idx = Index([1, 2, 3], dtype="int64", name="idx")
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
|
||
|
|
||
|
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
|
||
|
|
||
|
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
|
||
|
|
||
|
idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
|
||
|
|
||
|
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||
|
with pytest.raises(KeyError, match="with any missing labels"):
|
||
|
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
|
||
|
|
||
|
# iloc
|
||
|
exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
|
||
|
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
|
||
|
result = ser.iloc[[1, 1, 0, 0]]
|
||
|
tm.assert_series_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
def test_partial_set_invalid(self):
|
||
|
|
||
|
# GH 4940
|
||
|
# allow only setting of 'valid' values
|
||
|
|
||
|
orig = tm.makeTimeDataFrame()
|
||
|
df = orig.copy()
|
||
|
|
||
|
# don't allow not string inserts
|
||
|
msg = "cannot insert DatetimeArray with incompatible label"
|
||
|
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
df.loc[100.0, :] = df.iloc[0]
|
||
|
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
df.loc[100, :] = df.iloc[0]
|
||
|
|
||
|
# allow object conversion here
|
||
|
df = orig.copy()
|
||
|
df.loc["a", :] = df.iloc[0]
|
||
|
exp = orig.append(Series(df.iloc[0], name="a"))
|
||
|
tm.assert_frame_equal(df, exp)
|
||
|
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
|
||
|
assert df.index.dtype == "object"
|
||
|
|
||
|
def test_partial_set_empty_series(self):
|
||
|
|
||
|
# GH5226
|
||
|
|
||
|
# partially set with an empty object series
|
||
|
s = Series(dtype=object)
|
||
|
s.loc[1] = 1
|
||
|
tm.assert_series_equal(s, Series([1], index=[1]))
|
||
|
s.loc[3] = 3
|
||
|
tm.assert_series_equal(s, Series([1, 3], index=[1, 3]))
|
||
|
|
||
|
s = Series(dtype=object)
|
||
|
s.loc[1] = 1.0
|
||
|
tm.assert_series_equal(s, Series([1.0], index=[1]))
|
||
|
s.loc[3] = 3.0
|
||
|
tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3]))
|
||
|
|
||
|
s = Series(dtype=object)
|
||
|
s.loc["foo"] = 1
|
||
|
tm.assert_series_equal(s, Series([1], index=["foo"]))
|
||
|
s.loc["bar"] = 3
|
||
|
tm.assert_series_equal(s, Series([1, 3], index=["foo", "bar"]))
|
||
|
s.loc[3] = 4
|
||
|
tm.assert_series_equal(s, Series([1, 3, 4], index=["foo", "bar", 3]))
|
||
|
|
||
|
def test_partial_set_empty_frame(self):
|
||
|
|
||
|
# partially set with an empty object
|
||
|
# frame
|
||
|
df = DataFrame()
|
||
|
|
||
|
msg = "cannot set a frame with no defined columns"
|
||
|
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
df.loc[1] = 1
|
||
|
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
df.loc[1] = Series([1], index=["foo"])
|
||
|
|
||
|
msg = "cannot set a frame with no defined index and a scalar"
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
df.loc[:, 1] = 1
|
||
|
|
||
|
# these work as they don't really change
|
||
|
# anything but the index
|
||
|
# GH5632
|
||
|
expected = DataFrame(columns=["foo"], index=Index([], dtype="object"))
|
||
|
|
||
|
def f():
|
||
|
df = DataFrame(index=Index([], dtype="object"))
|
||
|
df["foo"] = Series([], dtype="object")
|
||
|
return df
|
||
|
|
||
|
tm.assert_frame_equal(f(), expected)
|
||
|
|
||
|
def f():
|
||
|
df = DataFrame()
|
||
|
df["foo"] = Series(df.index)
|
||
|
return df
|
||
|
|
||
|
tm.assert_frame_equal(f(), expected)
|
||
|
|
||
|
def f():
|
||
|
df = DataFrame()
|
||
|
df["foo"] = df.index
|
||
|
return df
|
||
|
|
||
|
tm.assert_frame_equal(f(), expected)
|
||
|
|
||
|
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
|
||
|
expected["foo"] = expected["foo"].astype("float64")
|
||
|
|
||
|
def f():
|
||
|
df = DataFrame(index=Index([], dtype="int64"))
|
||
|
df["foo"] = []
|
||
|
return df
|
||
|
|
||
|
tm.assert_frame_equal(f(), expected)
|
||
|
|
||
|
def f():
|
||
|
df = DataFrame(index=Index([], dtype="int64"))
|
||
|
df["foo"] = Series(np.arange(len(df)), dtype="float64")
|
||
|
return df
|
||
|
|
||
|
tm.assert_frame_equal(f(), expected)
|
||
|
|
||
|
def f():
|
||
|
df = DataFrame(index=Index([], dtype="int64"))
|
||
|
df["foo"] = range(len(df))
|
||
|
return df
|
||
|
|
||
|
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
|
||
|
expected["foo"] = expected["foo"].astype("float64")
|
||
|
tm.assert_frame_equal(f(), expected)
|
||
|
|
||
|
df = DataFrame()
|
||
|
tm.assert_index_equal(df.columns, Index([], dtype=object))
|
||
|
df2 = DataFrame()
|
||
|
df2[1] = Series([1], index=["foo"])
|
||
|
df.loc[:, 1] = Series([1], index=["foo"])
|
||
|
tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
|
||
|
tm.assert_frame_equal(df, df2)
|
||
|
|
||
|
# no index to start
|
||
|
expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
|
||
|
|
||
|
df = DataFrame(columns=["A", "B"])
|
||
|
df[0] = Series(1, index=range(4))
|
||
|
df.dtypes
|
||
|
str(df)
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
df = DataFrame(columns=["A", "B"])
|
||
|
df.loc[:, 0] = Series(1, index=range(4))
|
||
|
df.dtypes
|
||
|
str(df)
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_partial_set_empty_frame_row(self):
|
||
|
# GH5720, GH5744
|
||
|
# don't create rows when empty
|
||
|
expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
|
||
|
expected["A"] = expected["A"].astype("int64")
|
||
|
expected["B"] = expected["B"].astype("float64")
|
||
|
expected["New"] = expected["New"].astype("float64")
|
||
|
|
||
|
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||
|
y = df[df.A > 5]
|
||
|
y["New"] = np.nan
|
||
|
tm.assert_frame_equal(y, expected)
|
||
|
# tm.assert_frame_equal(y,expected)
|
||
|
|
||
|
expected = DataFrame(columns=["a", "b", "c c", "d"])
|
||
|
expected["d"] = expected["d"].astype("int64")
|
||
|
df = DataFrame(columns=["a", "b", "c c"])
|
||
|
df["d"] = 3
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
|
||
|
|
||
|
# reindex columns is ok
|
||
|
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||
|
y = df[df.A > 5]
|
||
|
result = y.reindex(columns=["A", "B", "C"])
|
||
|
expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64"))
|
||
|
expected["A"] = expected["A"].astype("int64")
|
||
|
expected["B"] = expected["B"].astype("float64")
|
||
|
expected["C"] = expected["C"].astype("float64")
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_partial_set_empty_frame_set_series(self):
|
||
|
# GH 5756
|
||
|
# setting with empty Series
|
||
|
df = DataFrame(Series(dtype=object))
|
||
|
tm.assert_frame_equal(df, DataFrame({0: Series(dtype=object)}))
|
||
|
|
||
|
df = DataFrame(Series(name="foo", dtype=object))
|
||
|
tm.assert_frame_equal(df, DataFrame({"foo": Series(dtype=object)}))
|
||
|
|
||
|
def test_partial_set_empty_frame_empty_copy_assignment(self):
|
||
|
# GH 5932
|
||
|
# copy on empty with assignment fails
|
||
|
df = DataFrame(index=[0])
|
||
|
df = df.copy()
|
||
|
df["a"] = 0
|
||
|
expected = DataFrame(0, index=[0], columns=["a"])
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_partial_set_empty_frame_empty_consistencies(self):
|
||
|
# GH 6171
|
||
|
# consistency on empty frames
|
||
|
df = DataFrame(columns=["x", "y"])
|
||
|
df["x"] = [1, 2]
|
||
|
expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan]))
|
||
|
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||
|
|
||
|
df = DataFrame(columns=["x", "y"])
|
||
|
df["x"] = ["1", "2"]
|
||
|
expected = DataFrame(dict(x=["1", "2"], y=[np.nan, np.nan]), dtype=object)
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
df = DataFrame(columns=["x", "y"])
|
||
|
df.loc[0, "x"] = 1
|
||
|
expected = DataFrame(dict(x=[1], y=[np.nan]))
|
||
|
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"idx,labels,expected_idx",
|
||
|
[
|
||
|
(
|
||
|
period_range(start="2000", periods=20, freq="D"),
|
||
|
["2000-01-04", "2000-01-08", "2000-01-12"],
|
||
|
[
|
||
|
Period("2000-01-04", freq="D"),
|
||
|
Period("2000-01-08", freq="D"),
|
||
|
Period("2000-01-12", freq="D"),
|
||
|
],
|
||
|
),
|
||
|
(
|
||
|
date_range(start="2000", periods=20, freq="D"),
|
||
|
["2000-01-04", "2000-01-08", "2000-01-12"],
|
||
|
[
|
||
|
Timestamp("2000-01-04", freq="D"),
|
||
|
Timestamp("2000-01-08", freq="D"),
|
||
|
Timestamp("2000-01-12", freq="D"),
|
||
|
],
|
||
|
),
|
||
|
(
|
||
|
pd.timedelta_range(start="1 day", periods=20),
|
||
|
["4D", "8D", "12D"],
|
||
|
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
def test_loc_with_list_of_strings_representing_datetimes(
|
||
|
self, idx, labels, expected_idx
|
||
|
):
|
||
|
# GH 11278
|
||
|
s = Series(range(20), index=idx)
|
||
|
df = DataFrame(range(20), index=idx)
|
||
|
|
||
|
expected_value = [3, 7, 11]
|
||
|
expected_s = Series(expected_value, expected_idx)
|
||
|
expected_df = DataFrame(expected_value, expected_idx)
|
||
|
|
||
|
tm.assert_series_equal(expected_s, s.loc[labels])
|
||
|
tm.assert_series_equal(expected_s, s[labels])
|
||
|
tm.assert_frame_equal(expected_df, df.loc[labels])
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"idx,labels",
|
||
|
[
|
||
|
(
|
||
|
period_range(start="2000", periods=20, freq="D"),
|
||
|
["2000-01-04", "2000-01-30"],
|
||
|
),
|
||
|
(
|
||
|
date_range(start="2000", periods=20, freq="D"),
|
||
|
["2000-01-04", "2000-01-30"],
|
||
|
),
|
||
|
(pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]),
|
||
|
],
|
||
|
)
|
||
|
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
|
||
|
self, idx, labels
|
||
|
):
|
||
|
# GH 11278
|
||
|
s = Series(range(20), index=idx)
|
||
|
df = DataFrame(range(20), index=idx)
|
||
|
msg = r"with any missing labels"
|
||
|
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
s.loc[labels]
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
s[labels]
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
df.loc[labels]
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"idx,labels,msg",
|
||
|
[
|
||
|
(
|
||
|
period_range(start="2000", periods=20, freq="D"),
|
||
|
["4D", "8D"],
|
||
|
(
|
||
|
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
|
||
|
r"are in the \[index\]"
|
||
|
),
|
||
|
),
|
||
|
(
|
||
|
date_range(start="2000", periods=20, freq="D"),
|
||
|
["4D", "8D"],
|
||
|
(
|
||
|
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
|
||
|
r"are in the \[index\]"
|
||
|
),
|
||
|
),
|
||
|
(
|
||
|
pd.timedelta_range(start="1 day", periods=20),
|
||
|
["2000-01-04", "2000-01-08"],
|
||
|
(
|
||
|
r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
|
||
|
r"dtype='object'\)\] are in the \[index\]"
|
||
|
),
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
|
||
|
self, idx, labels, msg
|
||
|
):
|
||
|
# GH 11278
|
||
|
s = Series(range(20), index=idx)
|
||
|
df = DataFrame(range(20), index=idx)
|
||
|
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
s.loc[labels]
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
s[labels]
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
df.loc[labels]
|
||
|
|
||
|
def test_indexing_timeseries_regression(self):
|
||
|
# Issue 34860
|
||
|
arr = date_range("1/1/2008", "1/1/2009")
|
||
|
result = arr.to_series()["2008"]
|
||
|
|
||
|
rng = date_range(start="2008-01-01", end="2008-12-31")
|
||
|
expected = Series(rng, index=rng)
|
||
|
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_index_name_empty(self):
|
||
|
# GH 31368
|
||
|
df = pd.DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
|
||
|
series = pd.Series(1.23, index=pd.RangeIndex(4, name="series_index"))
|
||
|
|
||
|
df["series"] = series
|
||
|
expected = pd.DataFrame(
|
||
|
{"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index")
|
||
|
)
|
||
|
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 36527
|
||
|
df = pd.DataFrame()
|
||
|
series = pd.Series(1.23, index=pd.RangeIndex(4, name="series_index"))
|
||
|
df["series"] = series
|
||
|
expected = pd.DataFrame(
|
||
|
{"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index")
|
||
|
)
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_slice_irregular_datetime_index_with_nan(self):
|
||
|
# GH36953
|
||
|
index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])
|
||
|
df = DataFrame(range(len(index)), index=index)
|
||
|
expected = DataFrame(range(len(index[:3])), index=index[:3])
|
||
|
result = df["2012-01-01":"2012-01-04"]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_slice_datetime_index(self):
|
||
|
# GH35509
|
||
|
df = DataFrame(
|
||
|
{"col1": ["a", "b", "c"], "col2": [1, 2, 3]},
|
||
|
index=pd.to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]),
|
||
|
)
|
||
|
expected = DataFrame(
|
||
|
{"col1": ["a", "c"], "col2": [1, 3]},
|
||
|
index=pd.to_datetime(["2020-08-01", "2020-08-05"]),
|
||
|
)
|
||
|
result = df.loc["2020-08"]
|
||
|
tm.assert_frame_equal(result, expected)
|