from datetime import datetime, timedelta import numpy as np import pytest from pandas.errors import UnsupportedFunctionCall from pandas import ( DataFrame, DatetimeIndex, MultiIndex, Series, Timedelta, Timestamp, date_range, period_range, to_datetime, to_timedelta, ) import pandas._testing as tm from pandas.api.indexers import BaseIndexer from pandas.core.window import Rolling def test_doc_string(): df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) df df.rolling(2).sum() df.rolling(2, min_periods=1).sum() def test_constructor(frame_or_series): # GH 12669 c = frame_or_series(range(5)).rolling # valid c(0) c(window=2) c(window=2, min_periods=1) c(window=2, min_periods=1, center=True) c(window=2, min_periods=1, center=False) # GH 13383 msg = "window must be non-negative" with pytest.raises(ValueError, match=msg): c(-1) @pytest.mark.parametrize("w", [2.0, "foo", np.array([2])]) def test_invalid_constructor(frame_or_series, w): # not valid c = frame_or_series(range(5)).rolling msg = ( "window must be an integer|" "passed window foo is not compatible with a datetimelike index" ) with pytest.raises(ValueError, match=msg): c(window=w) msg = "min_periods must be an integer" with pytest.raises(ValueError, match=msg): c(window=2, min_periods=w) msg = "center must be a boolean" with pytest.raises(ValueError, match=msg): c(window=2, min_periods=1, center=w) @pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3)]) def test_constructor_with_timedelta_window(window): # GH 15440 n = 10 df = DataFrame( {"value": np.arange(n)}, index=date_range("2015-12-24", periods=n, freq="D") ) expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) result = df.rolling(window=window).sum() expected = DataFrame( {"value": expected_data}, index=date_range("2015-12-24", periods=n, freq="D"), ) tm.assert_frame_equal(result, expected) expected = df.rolling("3D").sum() tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("window", [timedelta(days=3), Timedelta(days=3), "3D"]) def test_constructor_timedelta_window_and_minperiods(window, raw): # GH 15305 n = 10 df = DataFrame( {"value": np.arange(n)}, index=date_range("2017-08-08", periods=n, freq="D") ) expected = DataFrame( {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, index=date_range("2017-08-08", periods=n, freq="D"), ) result_roll_sum = df.rolling(window=window, min_periods=2).sum() result_roll_generic = df.rolling(window=window, min_periods=2).apply(sum, raw=raw) tm.assert_frame_equal(result_roll_sum, expected) tm.assert_frame_equal(result_roll_generic, expected) @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(method): # see gh-12811 r = Rolling(Series([2, 4, 6]), window=2) msg = "numpy operations are not valid with window objects" with pytest.raises(UnsupportedFunctionCall, match=msg): getattr(r, method)(1, 2, 3) with pytest.raises(UnsupportedFunctionCall, match=msg): getattr(r, method)(dtype=np.float64) def test_closed_fixed(closed, arithmetic_win_operators): # GH 34315 func_name = arithmetic_win_operators df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) result = getattr(df_fixed.rolling(2, closed=closed, min_periods=1), func_name)() expected = getattr(df_time.rolling("2D", closed=closed), func_name)().reset_index( drop=True ) tm.assert_frame_equal(result, expected) def test_closed_fixed_binary_col(): # GH 34315 data = [0, 1, 1, 0, 0, 1, 0, 1] df = DataFrame( {"binary_col": data}, index=date_range(start="2020-01-01", freq="min", periods=len(data)), ) rolling = df.rolling(window=len(df), closed="left", min_periods=1) result = rolling.mean() expected = DataFrame( [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], columns=["binary_col"], index=date_range(start="2020-01-01", freq="min", periods=len(data)), ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("closed", ["neither", "left"]) def test_closed_empty(closed, arithmetic_win_operators): # GH 26005 func_name = arithmetic_win_operators ser = Series(data=np.arange(5), index=date_range("2000", periods=5, freq="2D")) roll = ser.rolling("1D", closed=closed) result = getattr(roll, func_name)() expected = Series([np.nan] * 5, index=ser.index) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("func", ["min", "max"]) def test_closed_one_entry(func): # GH24718 ser = Series(data=[2], index=date_range("2000", periods=1)) result = getattr(ser.rolling("10D", closed="left"), func)() tm.assert_series_equal(result, Series([np.nan], index=ser.index)) @pytest.mark.parametrize("func", ["min", "max"]) def test_closed_one_entry_groupby(func): # GH24718 ser = DataFrame( data={"A": [1, 1, 2], "B": [3, 2, 1]}, index=date_range("2000", periods=3) ) result = getattr( ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func )() exp_idx = MultiIndex.from_arrays(arrays=[[1, 1, 2], ser.index], names=("A", None)) expected = Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") tm.assert_series_equal(result, expected) @pytest.mark.parametrize("input_dtype", ["int", "float"]) @pytest.mark.parametrize( "func,closed,expected", [ ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), ], ) def test_closed_min_max_datetime(input_dtype, func, closed, expected): # see gh-21704 ser = Series( data=np.arange(10).astype(input_dtype), index=date_range("2000", periods=10) ) result = getattr(ser.rolling("3D", closed=closed), func)() expected = Series(expected, index=ser.index) tm.assert_series_equal(result, expected) def test_closed_uneven(): # see gh-21704 ser = Series(data=np.arange(10), index=date_range("2000", periods=10)) # uneven ser = ser.drop(index=ser.index[[1, 5]]) result = ser.rolling("3D", closed="left").min() expected = Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "func,closed,expected", [ ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), ], ) def test_closed_min_max_minp(func, closed, expected): # see gh-21704 ser = Series(data=np.arange(10), index=date_range("2000", periods=10)) ser[ser.index[-3:]] = np.nan result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() expected = Series(expected, index=ser.index) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "closed,expected", [ ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), ], ) def test_closed_median_quantile(closed, expected): # GH 26005 ser = Series(data=np.arange(10), index=date_range("2000", periods=10)) roll = ser.rolling("3D", closed=closed) expected = Series(expected, index=ser.index) result = roll.median() tm.assert_series_equal(result, expected) result = roll.quantile(0.5) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("roller", ["1s", 1]) def tests_empty_df_rolling(roller): # GH 15819 Verifies that datetime and integer rolling windows can be # applied to empty DataFrames expected = DataFrame() result = DataFrame().rolling(roller).sum() tm.assert_frame_equal(result, expected) # Verifies that datetime and integer rolling windows can be applied to # empty DataFrames with datetime index expected = DataFrame(index=DatetimeIndex([])) result = DataFrame(index=DatetimeIndex([])).rolling(roller).sum() tm.assert_frame_equal(result, expected) def test_empty_window_median_quantile(): # GH 26005 expected = Series([np.nan, np.nan, np.nan]) roll = Series(np.arange(3)).rolling(0) result = roll.median() tm.assert_series_equal(result, expected) result = roll.quantile(0.1) tm.assert_series_equal(result, expected) def test_missing_minp_zero(): # https://github.com/pandas-dev/pandas/pull/18921 # minp=0 x = Series([np.nan]) result = x.rolling(1, min_periods=0).sum() expected = Series([0.0]) tm.assert_series_equal(result, expected) # minp=1 result = x.rolling(1, min_periods=1).sum() expected = Series([np.nan]) tm.assert_series_equal(result, expected) def test_missing_minp_zero_variable(): # https://github.com/pandas-dev/pandas/pull/18921 x = Series( [np.nan] * 4, index=DatetimeIndex(["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"]), ) result = x.rolling(Timedelta("2d"), min_periods=0).sum() expected = Series(0.0, index=x.index) tm.assert_series_equal(result, expected) def test_multi_index_names(): # GH 16789, 16825 cols = MultiIndex.from_product([["A", "B"], ["C", "D", "E"]], names=["1", "2"]) df = DataFrame(np.ones((10, 6)), columns=cols) result = df.rolling(3).cov() tm.assert_index_equal(result.columns, df.columns) assert result.index.names == [None, "1", "2"] def test_rolling_axis_sum(axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) axis = df._get_axis_number(axis_frame) if axis == 0: expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) else: # axis == 1 expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) result = df.rolling(3, axis=axis_frame).sum() tm.assert_frame_equal(result, expected) def test_rolling_axis_count(axis_frame): # see gh-26055 df = DataFrame({"x": range(3), "y": range(3)}) axis = df._get_axis_number(axis_frame) if axis in [0, "index"]: expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) else: expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) result = df.rolling(2, axis=axis_frame, min_periods=0).count() tm.assert_frame_equal(result, expected) def test_readonly_array(): # GH-27766 arr = np.array([1, 3, np.nan, 3, 5]) arr.setflags(write=False) result = Series(arr).rolling(2).mean() expected = Series([np.nan, 2, np.nan, np.nan, 4]) tm.assert_series_equal(result, expected) def test_rolling_datetime(axis_frame, tz_naive_fixture): # GH-28192 tz = tz_naive_fixture df = DataFrame( {i: [1] * 2 for i in date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)} ) if axis_frame in [0, "index"]: result = df.T.rolling("2D", axis=axis_frame).sum().T else: result = df.rolling("2D", axis=axis_frame).sum() expected = DataFrame( { **{ i: [1.0] * 2 for i in date_range("2019-8-01", periods=1, freq="D", tz=tz) }, **{ i: [2.0] * 2 for i in date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) }, } ) tm.assert_frame_equal(result, expected) def test_rolling_window_as_string(): # see gh-22590 date_today = datetime.now() days = date_range(date_today, date_today + timedelta(365), freq="D") npr = np.random.RandomState(seed=421) data = npr.randint(1, high=100, size=len(days)) df = DataFrame({"DateCol": days, "metric": data}) df.set_index("DateCol", inplace=True) result = df.rolling(window="21D", min_periods=2, closed="left")["metric"].agg("max") expData = ( [np.nan] * 2 + [88.0] * 16 + [97.0] * 9 + [98.0] + [99.0] * 21 + [95.0] * 16 + [93.0] * 5 + [89.0] * 5 + [96.0] * 21 + [94.0] * 14 + [90.0] * 13 + [88.0] * 2 + [90.0] * 9 + [96.0] * 21 + [95.0] * 6 + [91.0] + [87.0] * 6 + [92.0] * 21 + [83.0] * 2 + [86.0] * 10 + [87.0] * 5 + [98.0] * 21 + [97.0] * 14 + [93.0] * 7 + [87.0] * 4 + [86.0] * 4 + [95.0] * 21 + [85.0] * 14 + [83.0] * 2 + [76.0] * 5 + [81.0] * 2 + [98.0] * 21 + [95.0] * 14 + [91.0] * 7 + [86.0] + [93.0] * 3 + [95.0] * 20 ) expected = Series( expData, index=days.rename("DateCol")._with_freq(None), name="metric" ) tm.assert_series_equal(result, expected) def test_min_periods1(): # GH#6795 df = DataFrame([0, 1, 2, 1, 0], columns=["a"]) result = df["a"].rolling(3, center=True, min_periods=1).max() expected = Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") tm.assert_series_equal(result, expected) def test_rolling_count_with_min_periods(frame_or_series): # GH 26996 result = frame_or_series(range(5)).rolling(3, min_periods=3).count() expected = frame_or_series([np.nan, np.nan, 3.0, 3.0, 3.0]) tm.assert_equal(result, expected) def test_rolling_count_default_min_periods_with_null_values(frame_or_series): # GH 26996 values = [1, 2, 3, np.nan, 4, 5, 6] expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] # GH 31302 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = frame_or_series(values).rolling(3).count() expected = frame_or_series(expected_counts) tm.assert_equal(result, expected) @pytest.mark.parametrize( "df,expected,window,min_periods", [ ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ ({"A": [1], "B": [4]}, [0]), ({"A": [1, 2], "B": [4, 5]}, [0, 1]), ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), ], 3, None, ), ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ ({"A": [1], "B": [4]}, [0]), ({"A": [1, 2], "B": [4, 5]}, [0, 1]), ({"A": [2, 3], "B": [5, 6]}, [1, 2]), ], 2, 1, ), ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ ({"A": [1], "B": [4]}, [0]), ({"A": [1, 2], "B": [4, 5]}, [0, 1]), ({"A": [2, 3], "B": [5, 6]}, [1, 2]), ], 2, 2, ), ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ ({"A": [1], "B": [4]}, [0]), ({"A": [2], "B": [5]}, [1]), ({"A": [3], "B": [6]}, [2]), ], 1, 1, ), ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ ({"A": [1], "B": [4]}, [0]), ({"A": [2], "B": [5]}, [1]), ({"A": [3], "B": [6]}, [2]), ], 1, 0, ), (DataFrame({"A": [1], "B": [4]}), [], 2, None), (DataFrame({"A": [1], "B": [4]}), [], 2, 1), (DataFrame(), [({}, [])], 2, None), ( DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), [ ({"A": [1.0], "B": [np.nan]}, [0]), ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), ], 3, 2, ), ], ) def test_iter_rolling_dataframe(df, expected, window, min_periods): # GH 11704 expected = [DataFrame(values, index=index) for (values, index) in expected] for (expected, actual) in zip( expected, df.rolling(window, min_periods=min_periods) ): tm.assert_frame_equal(actual, expected) @pytest.mark.parametrize( "expected,window", [ ( [ ({"A": [1], "B": [4]}, [0]), ({"A": [1, 2], "B": [4, 5]}, [0, 1]), ({"A": [2, 3], "B": [5, 6]}, [1, 2]), ], "2D", ), ( [ ({"A": [1], "B": [4]}, [0]), ({"A": [1, 2], "B": [4, 5]}, [0, 1]), ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), ], "3D", ), ( [ ({"A": [1], "B": [4]}, [0]), ({"A": [2], "B": [5]}, [1]), ({"A": [3], "B": [6]}, [2]), ], "1D", ), ], ) def test_iter_rolling_on_dataframe(expected, window): # GH 11704 df = DataFrame( { "A": [1, 2, 3, 4, 5], "B": [4, 5, 6, 7, 8], "C": date_range(start="2016-01-01", periods=5, freq="D"), } ) expected = [DataFrame(values, index=index) for (values, index) in expected] for (expected, actual) in zip(expected, df.rolling(window, on="C")): tm.assert_frame_equal(actual, expected) @pytest.mark.parametrize( "ser,expected,window, min_periods", [ ( Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, None, ), ( Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 1, ), (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 1), (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 2), (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0), (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 1), (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0), (Series([], dtype="int64"), [], 2, 1), ], ) def test_iter_rolling_series(ser, expected, window, min_periods): # GH 11704 expected = [Series(values, index=index) for (values, index) in expected] for (expected, actual) in zip( expected, ser.rolling(window, min_periods=min_periods) ): tm.assert_series_equal(actual, expected) @pytest.mark.parametrize( "expected,expected_index,window", [ ( [[0], [1], [2], [3], [4]], [ date_range("2020-01-01", periods=1, freq="D"), date_range("2020-01-02", periods=1, freq="D"), date_range("2020-01-03", periods=1, freq="D"), date_range("2020-01-04", periods=1, freq="D"), date_range("2020-01-05", periods=1, freq="D"), ], "1D", ), ( [[0], [0, 1], [1, 2], [2, 3], [3, 4]], [ date_range("2020-01-01", periods=1, freq="D"), date_range("2020-01-01", periods=2, freq="D"), date_range("2020-01-02", periods=2, freq="D"), date_range("2020-01-03", periods=2, freq="D"), date_range("2020-01-04", periods=2, freq="D"), ], "2D", ), ( [[0], [0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], [ date_range("2020-01-01", periods=1, freq="D"), date_range("2020-01-01", periods=2, freq="D"), date_range("2020-01-01", periods=3, freq="D"), date_range("2020-01-02", periods=3, freq="D"), date_range("2020-01-03", periods=3, freq="D"), ], "3D", ), ], ) def test_iter_rolling_datetime(expected, expected_index, window): # GH 11704 ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) expected = [ Series(values, index=idx) for (values, idx) in zip(expected, expected_index) ] for (expected, actual) in zip(expected, ser.rolling(window)): tm.assert_series_equal(actual, expected) @pytest.mark.parametrize( "grouping,_index", [ ( {"level": 0}, MultiIndex.from_tuples( [(0, 0), (0, 0), (1, 1), (1, 1), (1, 1)], names=[None, None] ), ), ( {"by": "X"}, MultiIndex.from_tuples( [(0, 0), (1, 0), (2, 1), (3, 1), (4, 1)], names=["X", None] ), ), ], ) def test_rolling_positional_argument(grouping, _index, raw): # GH 34605 def scaled_sum(*args): if len(args) < 2: raise ValueError("The function needs two arguments") array, scale = args return array.sum() / scale df = DataFrame(data={"X": range(5)}, index=[0, 0, 1, 1, 1]) expected = DataFrame(data={"X": [0.0, 0.5, 1.0, 1.5, 2.0]}, index=_index) result = df.groupby(**grouping).rolling(1).apply(scaled_sum, raw=raw, args=(2,)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("add", [0.0, 2.0]) def test_rolling_numerical_accuracy_kahan_mean(add): # GH: 36031 implementing kahan summation df = DataFrame( {"A": [3002399751580331.0 + add, -0.0, -0.0]}, index=[ Timestamp("19700101 09:00:00"), Timestamp("19700101 09:00:03"), Timestamp("19700101 09:00:06"), ], ) result = ( df.resample("1s").ffill().rolling("3s", closed="left", min_periods=3).mean() ) dates = date_range("19700101 09:00:00", periods=7, freq="S") expected = DataFrame( { "A": [ np.nan, np.nan, np.nan, 3002399751580330.5, 2001599834386887.25, 1000799917193443.625, 0.0, ] }, index=dates, ) tm.assert_frame_equal(result, expected) def test_rolling_numerical_accuracy_kahan_sum(): # GH: 13254 df = DataFrame([2.186, -1.647, 0.0, 0.0, 0.0, 0.0], columns=["x"]) result = df["x"].rolling(3).sum() expected = Series([np.nan, np.nan, 0.539, -1.647, 0.0, 0.0], name="x") tm.assert_series_equal(result, expected) def test_rolling_numerical_accuracy_jump(): # GH: 32761 index = date_range(start="2020-01-01", end="2020-01-02", freq="60s").append( DatetimeIndex(["2020-01-03"]) ) data = np.random.rand(len(index)) df = DataFrame({"data": data}, index=index) result = df.rolling("60s").mean() tm.assert_frame_equal(result, df[["data"]]) def test_rolling_numerical_accuracy_small_values(): # GH: 10319 s = Series( data=[0.00012456, 0.0003, -0.0, -0.0], index=date_range("1999-02-03", "1999-02-06"), ) result = s.rolling(1).mean() tm.assert_series_equal(result, s) def test_rolling_numerical_too_large_numbers(): # GH: 11645 dates = date_range("2015-01-01", periods=10, freq="D") ds = Series(data=range(10), index=dates, dtype=np.float64) ds[2] = -9e33 result = ds.rolling(5).mean() expected = Series( [np.nan, np.nan, np.nan, np.nan, -1.8e33, -1.8e33, -1.8e33, 5.0, 6.0, 7.0], index=dates, ) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( ("func", "value"), [("sum", 2.0), ("max", 1.0), ("min", 1.0), ("mean", 1.0), ("median", 1.0)], ) def test_rolling_mixed_dtypes_axis_1(func, value): # GH: 20649 df = DataFrame(1, index=[1, 2], columns=["a", "b", "c"]) df["c"] = 1.0 result = getattr(df.rolling(window=2, min_periods=1, axis=1), func)() expected = DataFrame( {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, index=[1, 2] ) tm.assert_frame_equal(result, expected) def test_rolling_axis_one_with_nan(): # GH: 35596 df = DataFrame( [ [0, 1, 2, 4, np.nan, np.nan, np.nan], [0, 1, 2, np.nan, np.nan, np.nan, np.nan], [0, 2, 2, np.nan, 2, np.nan, 1], ] ) result = df.rolling(window=7, min_periods=1, axis="columns").sum() expected = DataFrame( [ [0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0], [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0], [0.0, 2.0, 4.0, 4.0, 6.0, 6.0, 7.0], ] ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "value", ["test", to_datetime("2019-12-31"), to_timedelta("1 days 06:05:01.00003")], ) def test_rolling_axis_1_non_numeric_dtypes(value): # GH: 20649 df = DataFrame({"a": [1, 2]}) df["b"] = value result = df.rolling(window=2, min_periods=1, axis=1).sum() expected = DataFrame({"a": [1.0, 2.0]}) tm.assert_frame_equal(result, expected) def test_rolling_on_df_transposed(): # GH: 32724 df = DataFrame({"A": [1, None], "B": [4, 5], "C": [7, 8]}) expected = DataFrame({"A": [1.0, np.nan], "B": [5.0, 5.0], "C": [11.0, 13.0]}) result = df.rolling(min_periods=1, window=2, axis=1).sum() tm.assert_frame_equal(result, expected) result = df.T.rolling(min_periods=1, window=2).sum().T tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( ("index", "window"), [ ( period_range(start="2020-01-01 08:00", end="2020-01-01 08:08", freq="T"), "2T", ), ( period_range(start="2020-01-01 08:00", end="2020-01-01 12:00", freq="30T"), "1h", ), ], ) @pytest.mark.parametrize( ("func", "values"), [ ("min", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6]), ("max", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7]), ("sum", [np.nan, 0, 1, 3, 5, 7, 9, 11, 13]), ], ) def test_rolling_period_index(index, window, func, values): # GH: 34225 ds = Series([0, 1, 2, 3, 4, 5, 6, 7, 8], index=index) result = getattr(ds.rolling(window, closed="left"), func)() expected = Series(values, index=index) tm.assert_series_equal(result, expected) def test_rolling_sem(frame_or_series): # GH: 26476 obj = frame_or_series([0, 1, 2]) result = obj.rolling(2, min_periods=1).sem() if isinstance(result, DataFrame): result = Series(result[0].values) expected = Series([np.nan] + [0.707107] * 2) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( ("func", "third_value", "values"), [ ("var", 1, [5e33, 0, 0.5, 0.5, 2, 0]), ("std", 1, [7.071068e16, 0, 0.7071068, 0.7071068, 1.414214, 0]), ("var", 2, [5e33, 0.5, 0, 0.5, 2, 0]), ("std", 2, [7.071068e16, 0.7071068, 0, 0.7071068, 1.414214, 0]), ], ) def test_rolling_var_numerical_issues(func, third_value, values): # GH: 37051 ds = Series([99999999999999999, 1, third_value, 2, 3, 1, 1]) result = getattr(ds.rolling(2), func)() expected = Series([np.nan] + values) tm.assert_series_equal(result, expected) def test_timeoffset_as_window_parameter_for_corr(): # GH: 28266 exp = DataFrame( { "B": [ np.nan, np.nan, 0.9999999999999998, -1.0, 1.0, -0.3273268353539892, 0.9999999999999998, 1.0, 0.9999999999999998, 1.0, ], "A": [ np.nan, np.nan, -1.0, 1.0000000000000002, -0.3273268353539892, 0.9999999999999966, 1.0, 1.0000000000000002, 1.0, 1.0000000000000002, ], }, index=MultiIndex.from_tuples( [ (Timestamp("20130101 09:00:00"), "B"), (Timestamp("20130101 09:00:00"), "A"), (Timestamp("20130102 09:00:02"), "B"), (Timestamp("20130102 09:00:02"), "A"), (Timestamp("20130103 09:00:03"), "B"), (Timestamp("20130103 09:00:03"), "A"), (Timestamp("20130105 09:00:05"), "B"), (Timestamp("20130105 09:00:05"), "A"), (Timestamp("20130106 09:00:06"), "B"), (Timestamp("20130106 09:00:06"), "A"), ] ), ) df = DataFrame( {"B": [0, 1, 2, 4, 3], "A": [7, 4, 6, 9, 3]}, index=[ Timestamp("20130101 09:00:00"), Timestamp("20130102 09:00:02"), Timestamp("20130103 09:00:03"), Timestamp("20130105 09:00:05"), Timestamp("20130106 09:00:06"), ], ) res = df.rolling(window="3d").corr() tm.assert_frame_equal(exp, res) @pytest.mark.parametrize("method", ["var", "sum", "mean", "skew", "kurt", "min", "max"]) def test_rolling_decreasing_indices(method): """ Make sure that decreasing indices give the same results as increasing indices. GH 36933 """ df = DataFrame({"values": np.arange(-15, 10) ** 2}) df_reverse = DataFrame({"values": df["values"][::-1]}, index=df.index[::-1]) increasing = getattr(df.rolling(window=5), method)() decreasing = getattr(df_reverse.rolling(window=5), method)() assert np.abs(decreasing.values[::-1][:-4] - increasing.values[4:]).max() < 1e-12 @pytest.mark.parametrize( "method,expected", [ ( "var", [ float("nan"), 43.0, float("nan"), 136.333333, 43.5, 94.966667, 182.0, 318.0, ], ), ("mean", [float("nan"), 7.5, float("nan"), 21.5, 6.0, 9.166667, 13.0, 17.5]), ("sum", [float("nan"), 30.0, float("nan"), 86.0, 30.0, 55.0, 91.0, 140.0]), ( "skew", [ float("nan"), 0.709296, float("nan"), 0.407073, 0.984656, 0.919184, 0.874674, 0.842418, ], ), ( "kurt", [ float("nan"), -0.5916711736073559, float("nan"), -1.0028993131317954, -0.06103844629409494, -0.254143227116194, -0.37362637362637585, -0.45439658241367054, ], ), ], ) def test_rolling_non_monotonic(method, expected): """ Make sure the (rare) branch of non-monotonic indices is covered by a test. output from 1.1.3 is assumed to be the expected output. Output of sum/mean has manually been verified. GH 36933. """ # Based on an example found in computation.rst use_expanding = [True, False, True, False, True, True, True, True] df = DataFrame({"values": np.arange(len(use_expanding)) ** 2}) class CustomIndexer(BaseIndexer): def get_window_bounds(self, num_values, min_periods, center, closed): start = np.empty(num_values, dtype=np.int64) end = np.empty(num_values, dtype=np.int64) for i in range(num_values): if self.use_expanding[i]: start[i] = 0 end[i] = i + 1 else: start[i] = i end[i] = i + self.window_size return start, end indexer = CustomIndexer(window_size=4, use_expanding=use_expanding) result = getattr(df.rolling(indexer), method)() expected = DataFrame({"values": expected}) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( ("index", "window"), [([0, 1, 2, 3, 4], 2), (date_range("2001-01-01", freq="D", periods=5), "2D")], ) def test_rolling_corr_timedelta_index(index, window): # GH: 31286 x = Series([1, 2, 3, 4, 5], index=index) y = x.copy() x[0:2] = 0.0 result = x.rolling(window).corr(y) expected = Series([np.nan, np.nan, 1, 1, 1], index=index) tm.assert_almost_equal(result, expected) def test_groupby_rolling_nan_included(): # GH 35542 data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} df = DataFrame(data) result = df.groupby("group", dropna=False).rolling(1, min_periods=1).mean() expected = DataFrame( {"B": [0.0, 2.0, 3.0, 1.0, 4.0]}, # GH-38057 from_tuples puts the NaNs in the codes, result expects them # to be in the levels, at the moment # index=MultiIndex.from_tuples( # [("g1", 0), ("g1", 2), ("g2", 3), (np.nan, 1), (np.nan, 4)], # names=["group", None], # ), index=MultiIndex( [["g1", "g2", np.nan], [0, 1, 2, 3, 4]], [[0, 0, 1, 2, 2], [0, 2, 3, 1, 4]], names=["group", None], ), ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("method", ["skew", "kurt"]) def test_rolling_skew_kurt_numerical_stability(method): # GH: 6929 s = Series(np.random.rand(10)) expected = getattr(s.rolling(3), method)() s = s + 50000 result = getattr(s.rolling(3), method)() tm.assert_series_equal(result, expected) @pytest.mark.parametrize( ("method", "values"), [ ("skew", [2.0, 0.854563, 0.0, 1.999984]), ("kurt", [4.0, -1.289256, -1.2, 3.999946]), ], ) def test_rolling_skew_kurt_large_value_range(method, values): # GH: 37557 s = Series([3000000, 1, 1, 2, 3, 4, 999]) result = getattr(s.rolling(4), method)() expected = Series([np.nan] * 3 + values) tm.assert_series_equal(result, expected)