""" test label based indexing with loc """ from datetime import datetime, time, timedelta from io import StringIO import re from dateutil.tz import gettz import numpy as np import pytest from pandas.compat.numpy import is_numpy_dev import pandas.util._test_decorators as td import pandas as pd from pandas import ( Categorical, CategoricalIndex, DataFrame, Index, MultiIndex, Series, SparseDtype, Timedelta, Timestamp, date_range, timedelta_range, to_datetime, to_timedelta, ) import pandas._testing as tm from pandas.api.types import is_scalar from pandas.tests.indexing.common import Base class TestLoc(Base): def test_loc_getitem_int(self): # int label self.check_result("loc", 2, typs=["labels"], fails=KeyError) def test_loc_getitem_label(self): # label self.check_result("loc", "c", typs=["empty"], fails=KeyError) def test_loc_getitem_label_out_of_range(self): # out of range label self.check_result( "loc", "f", typs=["ints", "uints", "labels", "mixed", "ts"], fails=KeyError ) self.check_result("loc", "f", typs=["floats"], fails=KeyError) self.check_result("loc", "f", typs=["floats"], fails=KeyError) self.check_result("loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError) self.check_result("loc", 20, typs=["labels"], fails=KeyError) self.check_result("loc", 20, typs=["ts"], axes=0, fails=KeyError) self.check_result("loc", 20, typs=["floats"], axes=0, fails=KeyError) def test_loc_getitem_label_list(self): # TODO: test something here? # list of labels pass def test_loc_getitem_label_list_with_missing(self): self.check_result("loc", [0, 1, 2], typs=["empty"], fails=KeyError) self.check_result( "loc", [0, 2, 10], typs=["ints", "uints", "floats"], axes=0, fails=KeyError ) self.check_result( "loc", [3, 6, 7], typs=["ints", "uints", "floats"], axes=1, fails=KeyError ) # GH 17758 - MultiIndex and missing keys self.check_result( "loc", [(1, 3), (1, 4), (2, 5)], typs=["multi"], axes=0, fails=KeyError ) def test_loc_getitem_label_list_fails(self): # fails self.check_result( "loc", [20, 30, 40], typs=["ints", "uints"], axes=1, fails=KeyError ) def test_loc_getitem_label_array_like(self): # TODO: test something? # array like pass def test_loc_getitem_bool(self): # boolean indexers b = [True, False, True, False] self.check_result("loc", b, typs=["empty"], fails=IndexError) def test_loc_getitem_label_slice(self): # label slices (with ints) # real label slices # GH 14316 self.check_result( "loc", slice(1, 3), typs=["labels", "mixed", "empty", "ts", "floats"], fails=TypeError, ) self.check_result( "loc", slice("20130102", "20130104"), typs=["ts"], axes=1, fails=TypeError ) self.check_result("loc", slice(2, 8), typs=["mixed"], axes=0, fails=TypeError) self.check_result("loc", slice(2, 8), typs=["mixed"], axes=1, fails=KeyError) self.check_result( "loc", slice(2, 4, 2), typs=["mixed"], axes=0, fails=TypeError ) def test_setitem_from_duplicate_axis(self): # GH#34034 df = DataFrame( [[20, "a"], [200, "a"], [200, "a"]], columns=["col1", "col2"], index=[10, 1, 1], ) df.loc[1, "col1"] = np.arange(2) expected = DataFrame( [[20, "a"], [0, "a"], [1, "a"]], columns=["col1", "col2"], index=[10, 1, 1] ) tm.assert_frame_equal(df, expected) class TestLoc2: # TODO: better name, just separating out things that rely on base class def test_loc_getitem_missing_unicode_key(self): df = DataFrame({"a": [1]}) with pytest.raises(KeyError, match="\u05d0"): df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError def test_loc_getitem_dups(self): # GH 5678 # repeated getitems on a dup index returning a ndarray df = DataFrame( np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)] ) expected = df.loc["A", 0] result = df.loc[:, 0].loc["A"] tm.assert_series_equal(result, expected) def test_loc_getitem_dups2(self): # GH4726 # dup indexing with iloc/loc df = DataFrame( [[1, 2, "foo", "bar", Timestamp("20130101")]], columns=["a", "a", "a", "a", "a"], index=[1], ) expected = Series( [1, 2, "foo", "bar", Timestamp("20130101")], index=["a", "a", "a", "a", "a"], name=1, ) result = df.iloc[0] tm.assert_series_equal(result, expected) result = df.loc[1] tm.assert_series_equal(result, expected) def test_loc_setitem_dups(self): # GH 6541 df_orig = DataFrame( { "me": list("rttti"), "foo": list("aaade"), "bar": np.arange(5, dtype="float64") * 1.34 + 2, "bar2": np.arange(5, dtype="float64") * -0.34 + 2, } ).set_index("me") indexer = ( "r", ["bar", "bar2"], ) df = df_orig.copy() df.loc[indexer] *= 2.0 tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) indexer = ( "r", "bar", ) df = df_orig.copy() df.loc[indexer] *= 2.0 assert df.loc[indexer] == 2.0 * df_orig.loc[indexer] indexer = ( "t", ["bar", "bar2"], ) df = df_orig.copy() df.loc[indexer] *= 2.0 tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) def test_loc_setitem_slice(self): # GH10503 # assigning the same type should not change the type df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")}) ix = df1["a"] == 1 newb1 = df1.loc[ix, "b"] + 1 df1.loc[ix, "b"] = newb1 expected = DataFrame( {"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")} ) tm.assert_frame_equal(df1, expected) # assigning a new type should get the inferred type df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") ix = df1["a"] == 1 newb2 = df2.loc[ix, "b"] df1.loc[ix, "b"] = newb2 expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") tm.assert_frame_equal(df2, expected) def test_loc_setitem_dtype(self): # GH31340 df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}) cols = ["a", "b", "c"] df.loc[:, cols] = df.loc[:, cols].astype("float32") expected = DataFrame( {"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}, dtype="float32" ) # id is inferred as object tm.assert_frame_equal(df, expected) def test_getitem_label_list_with_missing(self): s = Series(range(3), index=["a", "b", "c"]) # consistency with pytest.raises(KeyError, match="with any missing labels"): s[["a", "d"]] s = Series(range(3)) with pytest.raises(KeyError, match="with any missing labels"): s[[0, 3]] @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) def test_loc_getitem_bool_diff_len(self, index): # GH26658 s = Series([1, 2, 3]) msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}" with pytest.raises(IndexError, match=msg): _ = s.loc[index] def test_loc_getitem_int_slice(self): # TODO: test something here? pass def test_loc_to_fail(self): # GH3449 df = DataFrame( np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"] ) # raise a KeyError? msg = ( r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): df.loc[[1, 2], [1, 2]] # GH 7496 # loc should not fallback s = Series(dtype=object) s.loc[1] = 1 s.loc["a"] = 2 with pytest.raises(KeyError, match=r"^-1$"): s.loc[-1] msg = ( r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): s.loc[[-1, -2]] msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\"" with pytest.raises(KeyError, match=msg): s.loc[["4"]] s.loc[-1] = 3 with pytest.raises(KeyError, match="with any missing labels"): s.loc[[-1, -2]] s["a"] = 2 msg = ( r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): s.loc[[-2]] del s["a"] with pytest.raises(KeyError, match=msg): s.loc[[-2]] = 0 # inconsistency between .loc[values] and .loc[values,:] # GH 7999 df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) msg = ( r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): df.loc[[3], :] with pytest.raises(KeyError, match=msg): df.loc[[3]] def test_loc_getitem_list_with_fail(self): # 15747 # should KeyError if *any* missing labels s = Series([1, 2, 3]) s.loc[[2]] with pytest.raises( KeyError, match=re.escape( "\"None of [Int64Index([3], dtype='int64')] are in the [index]\"" ), ): s.loc[[3]] # a non-match and a match with pytest.raises(KeyError, match="with any missing labels"): s.loc[[2, 3]] def test_loc_index(self): # gh-17131 # a boolean index should index like a boolean numpy array df = DataFrame( np.random.random(size=(5, 10)), index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"], ) mask = df.index.map(lambda x: "alpha" in x) expected = df.loc[np.array(mask)] result = df.loc[mask] tm.assert_frame_equal(result, expected) result = df.loc[mask.values] tm.assert_frame_equal(result, expected) result = df.loc[pd.array(mask, dtype="boolean")] tm.assert_frame_equal(result, expected) def test_loc_general(self): df = DataFrame( np.random.rand(4, 4), columns=["A", "B", "C", "D"], index=["A", "B", "C", "D"], ) # want this to work result = df.loc[:, "A":"B"].iloc[0:2, :] assert (result.columns == ["A", "B"]).all() assert (result.index == ["A", "B"]).all() # mixed type result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0] expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0) tm.assert_series_equal(result, expected) assert result.dtype == object def test_loc_setitem_consistency(self): # GH 6149 # coerce similarly for setitem and loc when rows have a null-slice expected = DataFrame( { "date": Series(0, index=range(5), dtype=np.int64), "val": Series(range(5), dtype=np.int64), } ) df = DataFrame( { "date": date_range("2000-01-01", "2000-01-5"), "val": Series(range(5), dtype=np.int64), } ) df.loc[:, "date"] = 0 tm.assert_frame_equal(df, expected) df = DataFrame( { "date": date_range("2000-01-01", "2000-01-5"), "val": Series(range(5), dtype=np.int64), } ) df.loc[:, "date"] = np.array(0, dtype=np.int64) tm.assert_frame_equal(df, expected) df = DataFrame( { "date": date_range("2000-01-01", "2000-01-5"), "val": Series(range(5), dtype=np.int64), } ) df.loc[:, "date"] = np.array([0, 0, 0, 0, 0], dtype=np.int64) tm.assert_frame_equal(df, expected) expected = DataFrame( { "date": Series("foo", index=range(5)), "val": Series(range(5), dtype=np.int64), } ) df = DataFrame( { "date": date_range("2000-01-01", "2000-01-5"), "val": Series(range(5), dtype=np.int64), } ) df.loc[:, "date"] = "foo" tm.assert_frame_equal(df, expected) expected = DataFrame( { "date": Series(1.0, index=range(5)), "val": Series(range(5), dtype=np.int64), } ) df = DataFrame( { "date": date_range("2000-01-01", "2000-01-5"), "val": Series(range(5), dtype=np.int64), } ) df.loc[:, "date"] = 1.0 tm.assert_frame_equal(df, expected) # GH 15494 # setting on frame with single row df = DataFrame({"date": Series([Timestamp("20180101")])}) df.loc[:, "date"] = "string" expected = DataFrame({"date": Series(["string"])}) tm.assert_frame_equal(df, expected) def test_loc_setitem_consistency_empty(self): # empty (essentially noops) expected = DataFrame(columns=["x", "y"]) expected["x"] = expected["x"].astype(np.int64) df = DataFrame(columns=["x", "y"]) df.loc[:, "x"] = 1 tm.assert_frame_equal(df, expected) df = DataFrame(columns=["x", "y"]) df["x"] = 1 tm.assert_frame_equal(df, expected) def test_loc_setitem_consistency_slice_column_len(self): # .loc[:,column] setting with slice == len of the column # GH10408 data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse Region,Site,RespondentID,,,,, Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) df.loc[:, ("Respondent", "StartDate")] = pd.to_datetime( df.loc[:, ("Respondent", "StartDate")] ) df.loc[:, ("Respondent", "EndDate")] = pd.to_datetime( df.loc[:, ("Respondent", "EndDate")] ) df.loc[:, ("Respondent", "Duration")] = ( df.loc[:, ("Respondent", "EndDate")] - df.loc[:, ("Respondent", "StartDate")] ) df.loc[:, ("Respondent", "Duration")] = df.loc[ :, ("Respondent", "Duration") ].astype("timedelta64[s]") expected = Series( [1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration") ) tm.assert_series_equal(df[("Respondent", "Duration")], expected) @pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"]) def test_loc_assign_non_ns_datetime(self, unit): # GH 27395, non-ns dtype assignment via .loc should work # and return the same result when using simple assignment df = DataFrame( { "timestamp": [ np.datetime64("2017-02-11 12:41:29"), np.datetime64("1991-11-07 04:22:37"), ] } ) df.loc[:, unit] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]") df["expected"] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]") expected = Series(df.loc[:, "expected"], name=unit) tm.assert_series_equal(df.loc[:, unit], expected) def test_loc_modify_datetime(self): # see gh-28837 df = DataFrame.from_dict( {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} ) df["date_dt"] = pd.to_datetime(df["date"], unit="ms", cache=True) df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] expected = DataFrame( [ [1485264372711, "2017-01-24 13:26:12.711", "2017-01-24 13:26:12.711"], [1485265925110, "2017-01-24 13:52:05.110", "2017-01-24 13:52:05.110"], [1540215845888, "2018-10-22 13:44:05.888", "2018-10-22 13:44:05.888"], [1540282121025, "2018-10-23 08:08:41.025", "2018-10-23 08:08:41.025"], ], columns=["date", "date_dt", "date_dt_cp"], ) columns = ["date_dt", "date_dt_cp"] expected[columns] = expected[columns].apply(pd.to_datetime) tm.assert_frame_equal(df, expected) def test_loc_setitem_frame(self): df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")) result = df.iloc[0, 0] df.loc["a", "A"] = 1 result = df.loc["a", "A"] assert result == 1 result = df.iloc[0, 0] assert result == 1 df.loc[:, "B":"D"] = 0 expected = df.loc[:, "B":"D"] result = df.iloc[:, 1:] tm.assert_frame_equal(result, expected) # GH 6254 # setting issue df = DataFrame(index=[3, 5, 4], columns=["A"]) df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") expected = DataFrame({"A": Series([1, 2, 3], index=[4, 3, 5])}).reindex( index=[3, 5, 4] ) tm.assert_frame_equal(df, expected) # GH 6252 # setting with an empty frame keys1 = ["@" + str(i) for i in range(5)] val1 = np.arange(5, dtype="int64") keys2 = ["@" + str(i) for i in range(4)] val2 = np.arange(4, dtype="int64") index = list(set(keys1).union(keys2)) df = DataFrame(index=index) df["A"] = np.nan df.loc[keys1, "A"] = val1 df["B"] = np.nan df.loc[keys2, "B"] = val2 expected = DataFrame( {"A": Series(val1, index=keys1), "B": Series(val2, index=keys2)} ).reindex(index=index) tm.assert_frame_equal(df, expected) # GH 8669 # invalid coercion of nan -> int df = DataFrame({"A": [1, 2, 3], "B": np.nan}) df.loc[df.B > df.A, "B"] = df.A expected = DataFrame({"A": [1, 2, 3], "B": np.nan}) tm.assert_frame_equal(df, expected) # GH 6546 # setting with mixed labels df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]}) result = df.loc[0, [1, 2]] expected = Series([1, 3], index=[1, 2], dtype=object, name=0) tm.assert_series_equal(result, expected) expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]}) df.loc[0, [1, 2]] = [5, 6] tm.assert_frame_equal(df, expected) def test_loc_setitem_frame_multiples(self): # multiple setting df = DataFrame( {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)} ) rhs = df.loc[1:2] rhs.index = df.index[0:2] df.loc[0:1] = rhs expected = DataFrame( {"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)} ) tm.assert_frame_equal(df, expected) # multiple setting with frame on rhs (with M8) df = DataFrame( { "date": date_range("2000-01-01", "2000-01-5"), "val": Series(range(5), dtype=np.int64), } ) expected = DataFrame( { "date": [ Timestamp("20000101"), Timestamp("20000102"), Timestamp("20000101"), Timestamp("20000102"), Timestamp("20000103"), ], "val": Series([0, 1, 0, 1, 2], dtype=np.int64), } ) rhs = df.loc[0:2] rhs.index = df.index[2:5] df.loc[2:4] = rhs tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "indexer", [["A"], slice(None, "A", None), np.array(["A"])] ) @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) def test_loc_setitem_with_scalar_index(self, indexer, value): # GH #19474 # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated # elementwisely, not using "setter('A', ['Z'])". df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) df.loc[0, indexer] = value result = df.loc[0, "A"] assert is_scalar(result) and result == "Z" @pytest.mark.parametrize( "index,box,expected", [ ( ([0, 2], ["A", "B", "C", "D"]), 7, DataFrame( [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]], columns=["A", "B", "C", "D"], ), ), ( (1, ["C", "D"]), [7, 8], DataFrame( [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]], columns=["A", "B", "C", "D"], ), ), ( (1, ["A", "B", "C"]), np.array([7, 8, 9], dtype=np.int64), DataFrame( [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"] ), ), ( (slice(1, 3, None), ["B", "C", "D"]), [[7, 8, 9], [10, 11, 12]], DataFrame( [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]], columns=["A", "B", "C", "D"], ), ), ( (slice(1, 3, None), ["C", "A", "D"]), np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64), DataFrame( [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]], columns=["A", "B", "C", "D"], ), ), ( (slice(None, None, None), ["A", "C"]), DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), DataFrame( [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] ), ), ], ) def test_loc_setitem_missing_columns(self, index, box, expected): # GH 29334 df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) df.loc[index] = box tm.assert_frame_equal(df, expected) def test_loc_coercion(self): # 12411 df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) expected = df.dtypes result = df.iloc[[0]] tm.assert_series_equal(result.dtypes, expected) result = df.iloc[[1]] tm.assert_series_equal(result.dtypes, expected) # 12045 import datetime df = DataFrame( {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]} ) expected = df.dtypes result = df.iloc[[0]] tm.assert_series_equal(result.dtypes, expected) result = df.iloc[[1]] tm.assert_series_equal(result.dtypes, expected) # 11594 df = DataFrame({"text": ["some words"] + [None] * 9}) expected = df.dtypes result = df.iloc[0:2] tm.assert_series_equal(result.dtypes, expected) result = df.iloc[3:] tm.assert_series_equal(result.dtypes, expected) def test_setitem_new_key_tz(self): # GH#12862 should not raise on assigning the second value vals = [ pd.to_datetime(42).tz_localize("UTC"), pd.to_datetime(666).tz_localize("UTC"), ] expected = Series(vals, index=["foo", "bar"]) ser = Series(dtype=object) ser["foo"] = vals[0] ser["bar"] = vals[1] tm.assert_series_equal(ser, expected) ser = Series(dtype=object) ser.loc["foo"] = vals[0] ser.loc["bar"] = vals[1] tm.assert_series_equal(ser, expected) def test_loc_non_unique(self): # GH3659 # non-unique indexer with loc slice # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs # these are going to raise because the we are non monotonic df = DataFrame( {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] ) msg = "'Cannot get left slice bound for non-unique label: 1'" with pytest.raises(KeyError, match=msg): df.loc[1:] msg = "'Cannot get left slice bound for non-unique label: 0'" with pytest.raises(KeyError, match=msg): df.loc[0:] msg = "'Cannot get left slice bound for non-unique label: 1'" with pytest.raises(KeyError, match=msg): df.loc[1:2] # monotonic are ok df = DataFrame( {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] ).sort_index(axis=0) result = df.loc[1:] expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3]) tm.assert_frame_equal(result, expected) result = df.loc[0:] tm.assert_frame_equal(result, df) result = df.loc[1:2] expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2]) tm.assert_frame_equal(result, expected) @pytest.mark.arm_slow def test_loc_non_unique_memory_error(self): # GH 4280 # non_unique index with a large selection triggers a memory error columns = list("ABCDEFG") def gen_test(length, l2): return pd.concat( [ DataFrame( np.random.randn(length, len(columns)), index=np.arange(length), columns=columns, ), DataFrame( np.ones((l2, len(columns))), index=[0] * l2, columns=columns ), ] ) def gen_expected(df, mask): len_mask = len(mask) return pd.concat( [ df.take([0]), DataFrame( np.ones((len_mask, len(columns))), index=[0] * len_mask, columns=columns, ), df.take(mask[1:]), ] ) df = gen_test(900, 100) assert df.index.is_unique is False mask = np.arange(100) result = df.loc[mask] expected = gen_expected(df, mask) tm.assert_frame_equal(result, expected) df = gen_test(900000, 100000) assert df.index.is_unique is False mask = np.arange(100000) result = df.loc[mask] expected = gen_expected(df, mask) tm.assert_frame_equal(result, expected) def test_loc_name(self): # GH 3880 df = DataFrame([[1, 1], [1, 1]]) df.index.name = "index_name" result = df.iloc[[0, 1]].index.name assert result == "index_name" result = df.loc[[0, 1]].index.name assert result == "index_name" def test_loc_empty_list_indexer_is_ok(self): df = tm.makeCustomDataframe(5, 2) # vertical empty tm.assert_frame_equal( df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True ) # horizontal empty tm.assert_frame_equal( df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True ) # horizontal empty tm.assert_frame_equal( df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) def test_identity_slice_returns_new_object(self): # GH13873 original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.loc[:] assert sliced_df is not original_df assert original_df[:] is not original_df # should be a shallow copy original_df["a"] = [4, 4, 4] assert (sliced_df["a"] == 4).all() # These should not return copies assert original_df is original_df.loc[:, :] df = DataFrame(np.random.randn(10, 4)) assert df[0] is df.loc[:, 0] # Same tests for Series original_series = Series([1, 2, 3, 4, 5, 6]) sliced_series = original_series.loc[:] assert sliced_series is not original_series assert original_series[:] is not original_series original_series[:3] = [7, 8, 9] assert all(sliced_series[:3] == [7, 8, 9]) @pytest.mark.xfail(reason="accidental fix reverted - GH37497") def test_loc_copy_vs_view(self): # GH 15631 x = DataFrame(zip(range(3), range(3)), columns=["a", "b"]) y = x.copy() q = y.loc[:, "a"] q += 2 tm.assert_frame_equal(x, y) z = x.copy() q = z.loc[x.index, "a"] q += 2 tm.assert_frame_equal(x, z) def test_loc_uint64(self): # GH20722 # Test whether loc accept uint64 max value as index. s = Series([1, 2], index=[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]) result = s.loc[np.iinfo("uint64").max - 1] expected = s.iloc[0] assert result == expected result = s.loc[[np.iinfo("uint64").max - 1]] expected = s.iloc[[0]] tm.assert_series_equal(result, expected) result = s.loc[[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]] tm.assert_series_equal(result, s) def test_loc_setitem_empty_append_expands_rows(self): # GH6173, various appends to an empty dataframe data = [1, 2, 3] expected = DataFrame({"x": data, "y": [None] * len(data)}) # appends to fit length of data df = DataFrame(columns=["x", "y"]) df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): # GH#37932 same as test_loc_setitem_empty_append_expands_rows # but with mixed dtype so we go through take_split_path data = [1, 2, 3] expected = DataFrame({"x": data, "y": [None] * len(data)}) df = DataFrame(columns=["x", "y"]) df["x"] = df["x"].astype(np.int64) df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) def test_loc_setitem_empty_append_single_value(self): # only appends one value expected = DataFrame({"x": [1.0], "y": [np.nan]}) df = DataFrame(columns=["x", "y"], dtype=float) df.loc[0, "x"] = expected.loc[0, "x"] tm.assert_frame_equal(df, expected) @pytest.mark.xfail(is_numpy_dev, reason="gh-35481") def test_loc_setitem_empty_append_raises(self): # GH6173, various appends to an empty dataframe data = [1, 2] df = DataFrame(columns=["x", "y"]) df.index = df.index.astype(np.int64) msg = ( r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " r"are in the \[index\]" ) with pytest.raises(KeyError, match=msg): df.loc[[0, 1], "x"] = data msg = "cannot copy sequence with size 2 to array axis with dimension 0" with pytest.raises(ValueError, match=msg): df.loc[0:2, "x"] = data def test_indexing_zerodim_np_array(self): # GH24924 df = DataFrame([[1, 2], [3, 4]]) result = df.loc[np.array(0)] s = Series([1, 2], name=0) tm.assert_series_equal(result, s) def test_series_indexing_zerodim_np_array(self): # GH24924 s = Series([1, 2]) result = s.loc[np.array(0)] assert result == 1 def test_loc_reverse_assignment(self): # GH26939 data = [1, 2, 3, 4, 5, 6] + [None] * 4 expected = Series(data, index=range(2010, 2020)) result = Series(index=range(2010, 2020), dtype=np.float64) result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1] tm.assert_series_equal(result, expected) def test_loc_setitem_str_to_small_float_conversion_type(self): # GH#20388 np.random.seed(13) col_data = [str(np.random.random() * 1e-12) for _ in range(5)] result = DataFrame(col_data, columns=["A"]) expected = DataFrame(col_data, columns=["A"], dtype=object) tm.assert_frame_equal(result, expected) # change the dtype of the elements from object to float one by one result.loc[result.index, "A"] = [float(x) for x in col_data] expected = DataFrame(col_data, columns=["A"], dtype=float) tm.assert_frame_equal(result, expected) def test_loc_getitem_time_object(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") mask = (rng.hour == 9) & (rng.minute == 30) obj = DataFrame(np.random.randn(len(rng), 3), index=rng) if frame_or_series is Series: obj = obj[0] result = obj.loc[time(9, 30)] exp = obj.loc[mask] tm.assert_equal(result, exp) chunk = obj.loc["1/4/2000":] result = chunk.loc[time(9, 30)] expected = result[-1:] # Without resetting the freqs, these are 5 min and 1440 min, respectively result.index = result.index._with_freq(None) expected.index = expected.index._with_freq(None) tm.assert_equal(result, expected) @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) @td.skip_if_no_scipy def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype): import scipy.sparse spmatrix_t = getattr(scipy.sparse, spmatrix_t) # The bug is triggered by a sparse matrix with purely sparse columns. So the # recipe below generates a rectangular matrix of dimension (5, 7) where all the # diagonal cells are ones, meaning the last two columns are purely sparse. rows, cols = 5, 7 spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) df = DataFrame.sparse.from_spmatrix(spmatrix) # regression test for GH#34526 itr_idx = range(2, rows) result = df.loc[itr_idx].values expected = spmatrix.toarray()[itr_idx] tm.assert_numpy_array_equal(result, expected) # regression test for GH#34540 result = df.loc[itr_idx].dtypes.values expected = np.full(cols, SparseDtype(dtype, fill_value=0)) tm.assert_numpy_array_equal(result, expected) def test_loc_getitem_listlike_all_retains_sparse(self): df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))}) result = df.loc[[0, 1]] tm.assert_frame_equal(result, df) @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_getitem_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"]) result = float_frame.loc[:, idx] expected = float_frame.loc[:, ["A", "B", "C"]] tm.assert_frame_equal(result, expected) def test_loc_getitem_timedelta_0seconds(self): # GH#10583 df = DataFrame(np.random.normal(size=(10, 4))) df.index = timedelta_range(start="0s", periods=10, freq="s") expected = df.loc[Timedelta("0s") :, :] result = df.loc["0s":, :] tm.assert_frame_equal(expected, result) @pytest.mark.parametrize( "val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))] ) def test_loc_getitem_uint64_scalar(self, val, expected): # see GH#19399 df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63]) result = df.loc[val] expected.name = val tm.assert_series_equal(result, expected) def test_loc_setitem_int_label_with_float64index(self): # note labels are floats ser = Series(["a", "b", "c"], index=[0, 0.5, 1]) tmp = ser.copy() ser.loc[1] = "zoo" tmp.iloc[2] = "zoo" tm.assert_series_equal(ser, tmp) @pytest.mark.parametrize( "indexer, expected", [ # The test name is a misnomer in the 0 case as df.index[indexer] # is a scalar. (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]), ], ) def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): # GH#16637 tdi = to_timedelta(range(10), unit="s") df = DataFrame({"x": range(10)}, dtype="int64", index=tdi) df.loc[df.index[indexer], "x"] = 20 expected = DataFrame( expected, index=tdi, columns=["x"], dtype="int64", ) tm.assert_frame_equal(expected, df) class TestLocWithMultiIndex: @pytest.mark.parametrize( "keys, expected", [ (["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]), (["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]), ((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]), ((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]), ((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]), ((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]), ((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]), ], ) @pytest.mark.parametrize("dim", ["index", "columns"]) def test_loc_getitem_multilevel_index_order(self, dim, keys, expected): # GH#22797 # Try to respect order of keys given for MultiIndex.loc kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]} df = DataFrame(np.arange(25).reshape(5, 5), **kwargs) exp_index = MultiIndex.from_arrays(expected) if dim == "index": res = df.loc[keys, :] tm.assert_index_equal(res.index, exp_index) elif dim == "columns": res = df.loc[:, keys] tm.assert_index_equal(res.columns, exp_index) def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data): ymd = multiindex_year_month_day_dataframe_random_data result = ymd.loc[2000] result2 = ymd["A"].loc[2000] assert result.index.names == ymd.index.names[1:] assert result2.index.names == ymd.index.names[1:] result = ymd.loc[2000, 2] result2 = ymd["A"].loc[2000, 2] assert result.index.name == ymd.index.names[2] assert result2.index.name == ymd.index.names[2] def test_loc_getitem_multiindex_nonunique_len_zero(self): # GH#13691 mi = MultiIndex.from_product([[0], [1, 1]]) ser = Series(0, index=mi) res = ser.loc[[]] expected = ser[:0] tm.assert_series_equal(res, expected) res2 = ser.loc[ser.iloc[0:0]] tm.assert_series_equal(res2, expected) def test_loc_getitem_access_none_value_in_multiindex(self): # GH#34318: test that you can access a None value using .loc # through a Multiindex ser = Series([None], pd.MultiIndex.from_arrays([["Level1"], ["Level2"]])) result = ser.loc[("Level1", "Level2")] assert result is None midx = MultiIndex.from_product([["Level1"], ["Level2_a", "Level2_b"]]) ser = Series([None] * len(midx), dtype=object, index=midx) result = ser.loc[("Level1", "Level2_a")] assert result is None ser = Series([1] * len(midx), dtype=object, index=midx) result = ser.loc[("Level1", "Level2_a")] assert result == 1 def test_loc_setitem_multiindex_slice(self): # GH 34870 index = pd.MultiIndex.from_tuples( zip( ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ), names=["first", "second"], ) result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index) result.loc[("baz", "one"):("foo", "two")] = 100 expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index) tm.assert_series_equal(result, expected) def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self): times = date_range("2000-01-01", freq="10min", periods=100000) ser = Series(range(100000), times) result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] tm.assert_series_equal(result, ser) def test_loc_getitem_sorted_index_level_with_duplicates(self): # GH#4516 sorting a MultiIndex with duplicates and multiple dtypes mi = MultiIndex.from_tuples( [ ("foo", "bar"), ("foo", "bar"), ("bah", "bam"), ("bah", "bam"), ("foo", "bar"), ("bah", "bam"), ], names=["A", "B"], ) df = DataFrame( [ [1.0, 1], [2.0, 2], [3.0, 3], [4.0, 4], [5.0, 5], [6.0, 6], ], index=mi, columns=["C", "D"], ) df = df.sort_index(level=0) expected = DataFrame( [[1.0, 1], [2.0, 2], [5.0, 5]], columns=["C", "D"], index=mi.take([0, 1, 4]) ) result = df.loc[("foo", "bar")] tm.assert_frame_equal(result, expected) class TestLocSetitemWithExpansion: @pytest.mark.slow def test_loc_setitem_with_expansion_large_dataframe(self): # GH#10692 result = DataFrame({"x": range(10 ** 6)}, dtype="int64") result.loc[len(result)] = len(result) + 1 expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") tm.assert_frame_equal(result, expected) def test_loc_setitem_empty_series(self): # GH#5226 # partially set with an empty object series ser = Series(dtype=object) ser.loc[1] = 1 tm.assert_series_equal(ser, Series([1], index=[1])) ser.loc[3] = 3 tm.assert_series_equal(ser, Series([1, 3], index=[1, 3])) ser = Series(dtype=object) ser.loc[1] = 1.0 tm.assert_series_equal(ser, Series([1.0], index=[1])) ser.loc[3] = 3.0 tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3])) ser = Series(dtype=object) ser.loc["foo"] = 1 tm.assert_series_equal(ser, Series([1], index=["foo"])) ser.loc["bar"] = 3 tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"])) ser.loc[3] = 4 tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3])) def test_loc_setitem_incremental_with_dst(self): # GH#20724 base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific")) idxs = [base + timedelta(seconds=i * 900) for i in range(16)] result = Series([0], index=[idxs[0]]) for ts in idxs: result.loc[ts] = 1 expected = Series(1, index=idxs) tm.assert_series_equal(result, expected) def test_loc_setitem_datetime_keys_cast(self): # GH#9516 dt1 = Timestamp("20130101 09:00:00") dt2 = Timestamp("20130101 10:00:00") for conv in [ lambda x: x, lambda x: x.to_datetime64(), lambda x: x.to_pydatetime(), lambda x: np.datetime64(x), ]: df = DataFrame() df.loc[conv(dt1), "one"] = 100 df.loc[conv(dt2), "one"] = 200 expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) tm.assert_frame_equal(df, expected) def test_loc_setitem_categorical_column_retains_dtype(self, ordered): # GH16360 result = DataFrame({"A": [1]}) result.loc[:, "B"] = Categorical(["b"], ordered=ordered) expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)}) tm.assert_frame_equal(result, expected) class TestLocCallable: def test_frame_loc_getitem_callable(self): # GH#11485 df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) # iloc cannot use boolean Series (see GH3635) # return bool indexer res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) res = df.loc[lambda x: x.B == "b", :] tm.assert_frame_equal(res, df.loc[df.B == "b", :]) res = df.loc[lambda x: x.B == "b", :] tm.assert_frame_equal(res, df.loc[df.B == "b", :]) res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) res = df.loc[lambda x: x.A > 2, lambda x: "B"] tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) res = df.loc[lambda x: x.A > 2, lambda x: "B"] tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) # scalar res = df.loc[lambda x: 1, lambda x: "A"] assert res == df.loc[1, "A"] res = df.loc[lambda x: 1, lambda x: "A"] assert res == df.loc[1, "A"] def test_frame_loc_getitem_callable_mixture(self): # GH#11485 df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) res = df.loc[lambda x: x.A > 2, ["A", "B"]] tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) res = df.loc[lambda x: x.A > 2, ["A", "B"]] tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) res = df.loc[[2, 3], lambda x: ["A", "B"]] tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) res = df.loc[[2, 3], lambda x: ["A", "B"]] tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) res = df.loc[3, lambda x: ["A", "B"]] tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) res = df.loc[3, lambda x: ["A", "B"]] tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) def test_frame_loc_getitem_callable_labels(self): # GH#11485 df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) # return label res = df.loc[lambda x: ["A", "C"]] tm.assert_frame_equal(res, df.loc[["A", "C"]]) res = df.loc[lambda x: ["A", "C"]] tm.assert_frame_equal(res, df.loc[["A", "C"]]) res = df.loc[lambda x: ["A", "C"], :] tm.assert_frame_equal(res, df.loc[["A", "C"], :]) res = df.loc[lambda x: ["A", "C"], lambda x: "X"] tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) # mixture res = df.loc[["A", "C"], lambda x: "X"] tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) res = df.loc[["A", "C"], lambda x: ["X"]] tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) res = df.loc[lambda x: ["A", "C"], "X"] tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) res = df.loc[lambda x: ["A", "C"], ["X"]] tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) def test_frame_loc_setitem_callable(self): # GH#11485 df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) # return label res = df.copy() res.loc[lambda x: ["A", "C"]] = -20 exp = df.copy() exp.loc[["A", "C"]] = -20 tm.assert_frame_equal(res, exp) res = df.copy() res.loc[lambda x: ["A", "C"], :] = 20 exp = df.copy() exp.loc[["A", "C"], :] = 20 tm.assert_frame_equal(res, exp) res = df.copy() res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 exp = df.copy() exp.loc[["A", "C"], "X"] = -1 tm.assert_frame_equal(res, exp) res = df.copy() res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] exp = df.copy() exp.loc[["A", "C"], ["X"]] = [5, 10] tm.assert_frame_equal(res, exp) # mixture res = df.copy() res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) exp = df.copy() exp.loc[["A", "C"], "X"] = np.array([-1, -2]) tm.assert_frame_equal(res, exp) res = df.copy() res.loc[["A", "C"], lambda x: ["X"]] = 10 exp = df.copy() exp.loc[["A", "C"], ["X"]] = 10 tm.assert_frame_equal(res, exp) res = df.copy() res.loc[lambda x: ["A", "C"], "X"] = -2 exp = df.copy() exp.loc[["A", "C"], "X"] = -2 tm.assert_frame_equal(res, exp) res = df.copy() res.loc[lambda x: ["A", "C"], ["X"]] = -4 exp = df.copy() exp.loc[["A", "C"], ["X"]] = -4 tm.assert_frame_equal(res, exp) class TestPartialStringSlicing: def test_loc_getitem_partial_string_slicing_datetimeindex(self): # GH#35509 df = DataFrame( {"col1": ["a", "b", "c"], "col2": [1, 2, 3]}, index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]), ) expected = DataFrame( {"col1": ["a", "c"], "col2": [1, 3]}, index=to_datetime(["2020-08-01", "2020-08-05"]), ) result = df.loc["2020-08"] tm.assert_frame_equal(result, expected) def test_loc_getitem_partial_string_slicing_with_periodindex(self): pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") ser = pi.to_series() result = ser.loc[:"2017-12"] expected = ser.iloc[:-1] tm.assert_series_equal(result, expected) def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): ix = timedelta_range(start="1 day", end="2 days", freq="1H") ser = ix.to_series() result = ser.loc[:"1 days"] expected = ser.iloc[:-1] tm.assert_series_equal(result, expected) def test_loc_getitem_str_timedeltaindex(self): # GH#16896 df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days")) expected = df.iloc[0] sliced = df.loc["0 days"] tm.assert_series_equal(sliced, expected) class TestLabelSlicing: def test_loc_getitem_label_slice_across_dst(self): # GH#21846 idx = date_range( "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" ) series2 = Series([0, 1, 2, 3, 4], index=idx) t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min") t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min") result = series2.loc[t_1:t_2] expected = Series([2, 3], index=idx[2:4]) tm.assert_series_equal(result, expected) result = series2[t_1] expected = 2 assert result == expected def test_loc_getitem_label_slice_period(self): ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") ser = ix.to_series() result = ser.loc[: ix[-2]] expected = ser.iloc[:-1] tm.assert_series_equal(result, expected) def test_loc_getitem_label_slice_timedelta64(self): ix = timedelta_range(start="1 day", end="2 days", freq="1H") ser = ix.to_series() result = ser.loc[: ix[-2]] expected = ser.iloc[:-1] tm.assert_series_equal(result, expected) def test_loc_getitem_slice_floats_inexact(self): index = [52195.504153, 52196.303147, 52198.369883] df = DataFrame(np.random.rand(3, 2), index=index) s1 = df.loc[52195.1:52196.5] assert len(s1) == 2 s1 = df.loc[52195.1:52196.6] assert len(s1) == 2 s1 = df.loc[52195.1:52198.9] assert len(s1) == 3 def test_loc_getitem_float_slice_float64index(self): ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) assert len(ser.loc[12.0:]) == 8 assert len(ser.loc[12.5:]) == 7 idx = np.arange(10, 20, dtype=float) idx[2] = 12.2 ser.index = idx assert len(ser.loc[12.0:]) == 8 assert len(ser.loc[12.5:]) == 7 @pytest.mark.parametrize( "start,stop, expected_slice", [ [np.timedelta64(0, "ns"), None, slice(0, 11)], [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)], [None, np.timedelta64(4, "D"), slice(0, 5)], ], ) def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice): # GH#20393 ser = Series(range(11), timedelta_range("0 days", "10 days")) result = ser.loc[slice(start, stop)] expected = ser.iloc[expected_slice] tm.assert_series_equal(result, expected) @pytest.mark.parametrize("start", ["2018", "2020"]) def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series, start): obj = frame_or_series( [1, 2, 3], index=[Timestamp("2016"), Timestamp("2019"), Timestamp("2017")], ) with tm.assert_produces_warning(FutureWarning): obj.loc[start:"2022"] @pytest.mark.parametrize("value", [1, 1.5]) def test_loc_getitem_slice_labels_int_in_object_index(self, frame_or_series, value): # GH: 26491 obj = frame_or_series(range(4), index=[value, "first", 2, "third"]) result = obj.loc[value:"third"] expected = frame_or_series(range(4), index=[value, "first", 2, "third"]) tm.assert_equal(result, expected) class TestLocBooleanMask: def test_loc_setitem_bool_mask_timedeltaindex(self): # GH#14946 df = DataFrame({"x": range(10)}) df.index = to_timedelta(range(10), unit="s") conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] expected_data = [ [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], [10, 10, 10, 3, 4, 5, 6, 7, 8, 9], ] for cond, data in zip(conditions, expected_data): result = df.copy() result.loc[cond, "x"] = 10 expected = DataFrame( data, index=to_timedelta(range(10), unit="s"), columns=["x"], dtype="int64", ) tm.assert_frame_equal(expected, result) def test_loc_setitem_mask_with_datetimeindex_tz(self): # GH#16889 # support .loc with alignment and tz-aware DatetimeIndex mask = np.array([True, False, True, False]) idx = date_range("20010101", periods=4, tz="UTC") df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") result = df.copy() result.loc[mask, :] = df.loc[mask, :] tm.assert_frame_equal(result, df) result = df.copy() result.loc[mask] = df.loc[mask] tm.assert_frame_equal(result, df) idx = date_range("20010101", periods=4) df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") result = df.copy() result.loc[mask, :] = df.loc[mask, :] tm.assert_frame_equal(result, df) result = df.copy() result.loc[mask] = df.loc[mask] tm.assert_frame_equal(result, df) def test_loc_setitem_mask_and_label_with_datetimeindex(self): # GH#9478 # a datetimeindex alignment issue with partial setting df = DataFrame( np.arange(6.0).reshape(3, 2), columns=list("AB"), index=date_range("1/1/2000", periods=3, freq="1H"), ) expected = df.copy() expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] mask = df.A < 1 df.loc[mask, "C"] = df.loc[mask].index tm.assert_frame_equal(df, expected) def test_loc_setitem_mask_td64_series_value(self): # GH#23462 key list of bools, value is a Series td1 = Timedelta(0) td2 = Timedelta(28767471428571405) df = DataFrame({"col": Series([td1, td2])}) df_copy = df.copy() ser = Series([td1]) expected = df["col"].iloc[1].value df.loc[[True, False]] = ser result = df["col"].iloc[1].value assert expected == result tm.assert_frame_equal(df, df_copy) class TestLocListlike: @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list]) def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box): # passing a list can include valid categories _or_ NA values ci = CategoricalIndex(["A", "B", np.nan]) ser = Series(range(3), index=ci) result = ser.loc[box(ci)] tm.assert_series_equal(result, ser) result = ser[box(ci)] tm.assert_series_equal(result, ser) result = ser.to_frame().loc[box(ci)] tm.assert_frame_equal(result, ser.to_frame()) ser2 = ser[:-1] ci2 = ci[1:] # but if there are no NAs present, this should raise KeyError msg = ( r"Passing list-likes to .loc or \[\] with any missing labels is no " "longer supported. The following labels were missing: " r"(Categorical)?Index\(\[nan\], .*\). " "See https" ) with pytest.raises(KeyError, match=msg): ser2.loc[box(ci2)] with pytest.raises(KeyError, match=msg): ser2[box(ci2)] with pytest.raises(KeyError, match=msg): ser2.to_frame().loc[box(ci2)] def test_series_loc_getitem_label_list_missing_values(): # gh-11428 key = np.array( ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" ) s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) with pytest.raises(KeyError, match="with any missing labels"): s.loc[key] def test_series_getitem_label_list_missing_integer_values(): # GH: 25927 s = Series( index=np.array([9730701000001104, 10049011000001109]), data=np.array([999000011000001104, 999000011000001104]), ) with pytest.raises(KeyError, match="with any missing labels"): s.loc[np.array([9730701000001104, 10047311000001102])] @pytest.mark.parametrize( "columns, column_key, expected_columns", [ ([2011, 2012, 2013], [2011, 2012], [0, 1]), ([2011, 2012, "All"], [2011, 2012], [0, 1]), ([2011, 2012, "All"], [2011, "All"], [0, 2]), ], ) def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns): # gh-14836 df = DataFrame(np.random.rand(3, 3), columns=columns, index=list("ABC")) expected = df.iloc[:, expected_columns] result = df.loc[["A", "B", "C"], column_key] if df.columns.is_object() and all(isinstance(x, int) for x in column_key): expected.columns = expected.columns.astype(int) tm.assert_frame_equal(result, expected, check_column_type=True) def test_loc_setitem_float_intindex(): # GH 8720 rand_data = np.random.randn(8, 4) result = DataFrame(rand_data) result.loc[:, 0.5] = np.nan expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) expected = DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5]) tm.assert_frame_equal(result, expected) result = DataFrame(rand_data) result.loc[:, 0.5] = np.nan tm.assert_frame_equal(result, expected) def test_loc_axis_1_slice(): # GH 10586 cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]] df = DataFrame( np.ones((10, 8)), index=tuple("ABCDEFGHIJ"), columns=pd.MultiIndex.from_tuples(cols), ) result = df.loc(axis=1)[(2014, 9):(2015, 8)] expected = DataFrame( np.ones((10, 4)), index=tuple("ABCDEFGHIJ"), columns=pd.MultiIndex.from_tuples( [(2014, 9), (2014, 10), (2015, 7), (2015, 8)] ), ) tm.assert_frame_equal(result, expected) def test_loc_set_dataframe_multiindex(): # GH 14592 expected = DataFrame( "a", index=range(2), columns=pd.MultiIndex.from_product([range(2), range(2)]) ) result = expected.copy() result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]] tm.assert_frame_equal(result, expected) def test_loc_mixed_int_float(): # GH#19456 ser = Series(range(2), pd.Index([1, 2.0], dtype=object)) result = ser.loc[1] assert result == 0 def test_loc_with_positional_slice_deprecation(): # GH#31840 ser = Series(range(4), index=["A", "B", "C", "D"]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ser.loc[:3] = 2 expected = Series([2, 2, 2, 3], index=["A", "B", "C", "D"]) tm.assert_series_equal(ser, expected) def test_loc_slice_disallows_positional(): # GH#16121, GH#24612, GH#31810 dti = pd.date_range("2016-01-01", periods=3) df = DataFrame(np.random.random((3, 2)), index=dti) ser = df[0] msg = ( "cannot do slice indexing on DatetimeIndex with these " r"indexers \[1\] of type int" ) for obj in [df, ser]: with pytest.raises(TypeError, match=msg): obj.loc[1:3] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#31840 deprecated incorrect behavior obj.loc[1:3] = 1 with pytest.raises(TypeError, match=msg): df.loc[1:3, 1] with tm.assert_produces_warning(FutureWarning): # GH#31840 deprecated incorrect behavior df.loc[1:3, 1] = 2 def test_loc_datetimelike_mismatched_dtypes(): # GH#32650 dont mix and match datetime/timedelta/period dtypes df = DataFrame( np.random.randn(5, 3), columns=["a", "b", "c"], index=pd.date_range("2012", freq="H", periods=5), ) # create dataframe with non-unique DatetimeIndex df = df.iloc[[0, 2, 2, 3]].copy() dti = df.index tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values msg = r"None of \[TimedeltaIndex.* are in the \[index\]" with pytest.raises(KeyError, match=msg): df.loc[tdi] with pytest.raises(KeyError, match=msg): df["a"].loc[tdi] def test_loc_with_period_index_indexer(): # GH#4125 idx = pd.period_range("2002-01", "2003-12", freq="M") df = DataFrame(np.random.randn(24, 10), index=idx) tm.assert_frame_equal(df, df.loc[idx]) tm.assert_frame_equal(df, df.loc[list(idx)]) tm.assert_frame_equal(df, df.loc[list(idx)]) tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) tm.assert_frame_equal(df, df.loc[list(idx)]) class TestLocSeries: @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) def test_loc_uint64(self, val, expected): # see GH#19399 ser = Series({2 ** 63 - 1: 3, 2 ** 63: 4}) assert ser.loc[val] == expected def test_loc_getitem(self, string_series, datetime_series): inds = string_series.index[[3, 4, 7]] tm.assert_series_equal(string_series.loc[inds], string_series.reindex(inds)) tm.assert_series_equal(string_series.iloc[5::2], string_series[5::2]) # slice with indices d1, d2 = datetime_series.index[[5, 15]] result = datetime_series.loc[d1:d2] expected = datetime_series.truncate(d1, d2) tm.assert_series_equal(result, expected) # boolean mask = string_series > string_series.median() tm.assert_series_equal(string_series.loc[mask], string_series[mask]) # ask for index value assert datetime_series.loc[d1] == datetime_series[d1] assert datetime_series.loc[d2] == datetime_series[d2] def test_loc_getitem_not_monotonic(self, datetime_series): d1, d2 = datetime_series.index[[5, 15]] ts2 = datetime_series[::2][[1, 2, 0]] msg = r"Timestamp\('2000-01-10 00:00:00'\)" with pytest.raises(KeyError, match=msg): ts2.loc[d1:d2] with pytest.raises(KeyError, match=msg): ts2.loc[d1:d2] = 0 def test_loc_getitem_setitem_integer_slice_keyerrors(self): ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) # this is OK cp = ser.copy() cp.iloc[4:10] = 0 assert (cp.iloc[4:10] == 0).all() # so is this cp = ser.copy() cp.iloc[3:11] = 0 assert (cp.iloc[3:11] == 0).values.all() result = ser.iloc[2:6] result2 = ser.loc[3:11] expected = ser.reindex([4, 6, 8, 10]) tm.assert_series_equal(result, expected) tm.assert_series_equal(result2, expected) # non-monotonic, raise KeyError s2 = ser.iloc[list(range(5)) + list(range(9, 4, -1))] with pytest.raises(KeyError, match=r"^3$"): s2.loc[3:11] with pytest.raises(KeyError, match=r"^3$"): s2.loc[3:11] = 0 def test_loc_getitem_iterator(self, string_series): idx = iter(string_series.index[:10]) result = string_series.loc[idx] tm.assert_series_equal(result, string_series[:10]) def test_loc_setitem_boolean(self, string_series): mask = string_series > string_series.median() result = string_series.copy() result.loc[mask] = 0 expected = string_series expected[mask] = 0 tm.assert_series_equal(result, expected) def test_loc_setitem_corner(self, string_series): inds = list(string_series.index[[5, 8, 12]]) string_series.loc[inds] = 5 msg = r"\['foo'\] not in index" with pytest.raises(KeyError, match=msg): string_series.loc[inds + ["foo"]] = 5 def test_basic_setitem_with_labels(self, datetime_series): indices = datetime_series.index[[5, 10, 15]] cp = datetime_series.copy() exp = datetime_series.copy() cp[indices] = 0 exp.loc[indices] = 0 tm.assert_series_equal(cp, exp) cp = datetime_series.copy() exp = datetime_series.copy() cp[indices[0] : indices[2]] = 0 exp.loc[indices[0] : indices[2]] = 0 tm.assert_series_equal(cp, exp) def test_loc_setitem_listlike_of_ints(self): # integer indexes, be careful ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) inds = [0, 4, 6] arr_inds = np.array([0, 4, 6]) cp = ser.copy() exp = ser.copy() ser[inds] = 0 ser.loc[inds] = 0 tm.assert_series_equal(cp, exp) cp = ser.copy() exp = ser.copy() ser[arr_inds] = 0 ser.loc[arr_inds] = 0 tm.assert_series_equal(cp, exp) inds_notfound = [0, 4, 5, 6] arr_inds_notfound = np.array([0, 4, 5, 6]) msg = r"\[5\] not in index" with pytest.raises(KeyError, match=msg): ser[inds_notfound] = 0 with pytest.raises(Exception, match=msg): ser[arr_inds_notfound] = 0 def test_loc_setitem_dt64tz_values(self): # GH#12089 ser = Series( date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"], ) s2 = ser.copy() expected = Timestamp("2011-01-03", tz="US/Eastern") s2.loc["a"] = expected result = s2.loc["a"] assert result == expected s2 = ser.copy() s2.iloc[0] = expected result = s2.iloc[0] assert result == expected s2 = ser.copy() s2["a"] = expected result = s2["a"] assert result == expected @pytest.mark.parametrize("array_fn", [np.array, pd.array, list, tuple]) @pytest.mark.parametrize("size", [0, 4, 5, 6]) def test_loc_iloc_setitem_with_listlike(self, size, array_fn): # GH37748 # testing insertion, in a Series of size N (here 5), of a listlike object # of size 0, N-1, N, N+1 arr = array_fn([0] * size) expected = Series([arr, 0, 0, 0, 0], index=list("abcde"), dtype=object) ser = Series(0, index=list("abcde"), dtype=object) ser.loc["a"] = arr tm.assert_series_equal(ser, expected) ser = Series(0, index=list("abcde"), dtype=object) ser.iloc[0] = arr tm.assert_series_equal(ser, expected)