from datetime import date, datetime, timedelta from dateutil import tz import numpy as np import pytest import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range import pandas._testing as tm class TestDatetimeIndex: def test_setitem_with_datetime_tz(self): # 16889 # support .loc with alignment and tz-aware DatetimeIndex mask = np.array([True, False, True, False]) idx = date_range("20010101", periods=4, tz="UTC") df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") result = df.copy() result.loc[mask, :] = df.loc[mask, :] tm.assert_frame_equal(result, df) result = df.copy() result.loc[mask] = df.loc[mask] tm.assert_frame_equal(result, df) idx = date_range("20010101", periods=4) df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") result = df.copy() result.loc[mask, :] = df.loc[mask, :] tm.assert_frame_equal(result, df) result = df.copy() result.loc[mask] = df.loc[mask] tm.assert_frame_equal(result, df) def test_indexing_with_datetime_tz(self): # GH#8260 # support datetime64 with tz idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") dr = date_range("20130110", periods=3) df = DataFrame({"A": idx, "B": dr}) df["C"] = idx df.iloc[1, 1] = pd.NaT df.iloc[1, 2] = pd.NaT # indexing result = df.iloc[1] expected = Series( [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], index=list("ABC"), dtype="object", name=1, ) tm.assert_series_equal(result, expected) result = df.loc[1] expected = Series( [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], index=list("ABC"), dtype="object", name=1, ) tm.assert_series_equal(result, expected) # indexing - fast_xs df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")}) result = df.iloc[5] expected = Series( [Timestamp("2014-01-06 00:00:00+0000", tz="UTC")], index=["a"], name=5 ) tm.assert_series_equal(result, expected) result = df.loc[5] tm.assert_series_equal(result, expected) # indexing - boolean result = df[df.a > df.a[3]] expected = df.iloc[4:] tm.assert_frame_equal(result, expected) # indexing - setting an element df = DataFrame( data=pd.to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]), columns=["time"], ) df["new_col"] = ["new", "old"] df.time = df.set_index("time").index.tz_localize("UTC") v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific") # trying to set a single element on a part of a different timezone # this converts to object df2 = df.copy() df2.loc[df2.new_col == "new", "time"] = v expected = Series([v[0], df.loc[1, "time"]], name="time") tm.assert_series_equal(df2.time, expected) v = df.loc[df.new_col == "new", "time"] + pd.Timedelta("1s") df.loc[df.new_col == "new", "time"] = v tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v) def test_consistency_with_tz_aware_scalar(self): # xef gh-12938 # various ways of indexing the same tz-aware scalar df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame() df = pd.concat([df, df]).reset_index(drop=True) expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels") result = df[0][0] assert result == expected result = df.iloc[0, 0] assert result == expected result = df.loc[0, 0] assert result == expected result = df.iat[0, 0] assert result == expected result = df.at[0, 0] assert result == expected result = df[0].loc[0] assert result == expected result = df[0].at[0] assert result == expected def test_indexing_with_datetimeindex_tz(self): # GH 12050 # indexing on a series with a datetimeindex with tz index = date_range("2015-01-01", periods=2, tz="utc") ser = Series(range(2), index=index, dtype="int64") # list-like indexing for sel in (index, list(index)): # getitem result = ser[sel] expected = ser.copy() if sel is not index: expected.index = expected.index._with_freq(None) tm.assert_series_equal(result, expected) # setitem result = ser.copy() result[sel] = 1 expected = Series(1, index=index) tm.assert_series_equal(result, expected) # .loc getitem result = ser.loc[sel] expected = ser.copy() if sel is not index: expected.index = expected.index._with_freq(None) tm.assert_series_equal(result, expected) # .loc setitem result = ser.copy() result.loc[sel] = 1 expected = Series(1, index=index) tm.assert_series_equal(result, expected) # single element indexing # getitem assert ser[index[1]] == 1 # setitem result = ser.copy() result[index[1]] = 5 expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) # .loc getitem assert ser.loc[index[1]] == 1 # .loc setitem result = ser.copy() result.loc[index[1]] = 5 expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) def test_partial_setting_with_datetimelike_dtype(self): # GH9478 # a datetimeindex alignment issue with partial setting df = DataFrame( np.arange(6.0).reshape(3, 2), columns=list("AB"), index=date_range("1/1/2000", periods=3, freq="1H"), ) expected = df.copy() expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] mask = df.A < 1 df.loc[mask, "C"] = df.loc[mask].index tm.assert_frame_equal(df, expected) def test_loc_setitem_datetime(self): # GH 9516 dt1 = Timestamp("20130101 09:00:00") dt2 = Timestamp("20130101 10:00:00") for conv in [ lambda x: x, lambda x: x.to_datetime64(), lambda x: x.to_pydatetime(), lambda x: np.datetime64(x), ]: df = DataFrame() df.loc[conv(dt1), "one"] = 100 df.loc[conv(dt2), "one"] = 200 expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) tm.assert_frame_equal(df, expected) def test_series_partial_set_datetime(self): # GH 11497 idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx") ser = Series([0.1, 0.2], index=idx, name="s") result = ser.loc[[Timestamp("2011-01-01"), Timestamp("2011-01-02")]] exp = Series([0.1, 0.2], index=idx, name="s") exp.index = exp.index._with_freq(None) tm.assert_series_equal(result, exp, check_index_type=True) keys = [ Timestamp("2011-01-02"), Timestamp("2011-01-02"), Timestamp("2011-01-01"), ] exp = Series( [0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name="idx"), name="s" ) tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) keys = [ Timestamp("2011-01-03"), Timestamp("2011-01-02"), Timestamp("2011-01-03"), ] with pytest.raises(KeyError, match="with any missing labels"): ser.loc[keys] def test_series_partial_set_period(self): # GH 11497 idx = pd.period_range("2011-01-01", "2011-01-02", freq="D", name="idx") ser = Series([0.1, 0.2], index=idx, name="s") result = ser.loc[ [pd.Period("2011-01-01", freq="D"), pd.Period("2011-01-02", freq="D")] ] exp = Series([0.1, 0.2], index=idx, name="s") tm.assert_series_equal(result, exp, check_index_type=True) keys = [ pd.Period("2011-01-02", freq="D"), pd.Period("2011-01-02", freq="D"), pd.Period("2011-01-01", freq="D"), ] exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name="idx"), name="s") tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) keys = [ pd.Period("2011-01-03", freq="D"), pd.Period("2011-01-02", freq="D"), pd.Period("2011-01-03", freq="D"), ] with pytest.raises(KeyError, match="with any missing labels"): ser.loc[keys] def test_nanosecond_getitem_setitem_with_tz(self): # GH 11679 data = ["2016-06-28 08:30:00.123456789"] index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]") df = DataFrame({"a": [10]}, index=index) result = df.loc[df.index[0]] expected = Series(10, index=["a"], name=df.index[0]) tm.assert_series_equal(result, expected) result = df.copy() result.loc[df.index[0], "a"] = -1 expected = DataFrame(-1, index=index, columns=["a"]) tm.assert_frame_equal(result, expected) def test_loc_getitem_across_dst(self): # GH 21846 idx = pd.date_range( "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" ) series2 = pd.Series([0, 1, 2, 3, 4], index=idx) t_1 = pd.Timestamp( "2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min" ) t_2 = pd.Timestamp( "2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min" ) result = series2.loc[t_1:t_2] expected = pd.Series([2, 3], index=idx[2:4]) tm.assert_series_equal(result, expected) result = series2[t_1] expected = 2 assert result == expected def test_loc_incremental_setitem_with_dst(self): # GH 20724 base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific")) idxs = [base + timedelta(seconds=i * 900) for i in range(16)] result = pd.Series([0], index=[idxs[0]]) for ts in idxs: result.loc[ts] = 1 expected = pd.Series(1, index=idxs) tm.assert_series_equal(result, expected) def test_loc_setitem_with_existing_dst(self): # GH 18308 start = pd.Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") end = pd.Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") ts = pd.Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") idx = pd.date_range(start, end, closed="left", freq="H") result = pd.DataFrame(index=idx, columns=["value"]) result.loc[ts, "value"] = 12 expected = pd.DataFrame( [np.nan] * len(idx) + [12], index=idx.append(pd.DatetimeIndex([ts])), columns=["value"], dtype=object, ) tm.assert_frame_equal(result, expected) def test_loc_str_slicing(self): ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") ser = ix.to_series() result = ser.loc[:"2017-12"] expected = ser.iloc[:-1] tm.assert_series_equal(result, expected) def test_loc_label_slicing(self): ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") ser = ix.to_series() result = ser.loc[: ix[-2]] expected = ser.iloc[:-1] tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "slice_, positions", [ [slice(date(2018, 1, 1), None), [0, 1, 2]], [slice(date(2019, 1, 2), None), [2]], [slice(date(2020, 1, 1), None), []], [slice(None, date(2020, 1, 1)), [0, 1, 2]], [slice(None, date(2019, 1, 1)), [0]], ], ) def test_getitem_slice_date(self, slice_, positions): # https://github.com/pandas-dev/pandas/issues/31501 s = pd.Series( [0, 1, 2], pd.DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]), ) result = s[slice_] expected = s.take(positions) tm.assert_series_equal(result, expected)