У меня есть MRE примеров данных и выходных столбцов, поэтому достаточно долго, чтобы полностью продемонстрировать то, что мне нужно:
Код: Выделить всё
import numpy as np
import pandas as pd
df = pd.DataFrame({
'Group': [
'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 2', 'Group 1', 'Group 2', 'Group 2',
'Group 1', 'Group 1', 'Group 2', 'Group 1', 'Group 2', 'Group 1', 'Group 2', 'Group 1',
'Group 2', 'Group 1', 'Group 2', 'Group 2', 'Group 2', 'Group 1', 'Group 2', 'Group 1',
'Group 2', 'Group 1', 'Group 2', 'Group 1', 'Group 2', 'Group 2', 'Group 1', 'Group 2',
'Group 1', 'Group 2', 'Group 1', 'Group 2', 'Group 1', 'Group 2', 'Group 1', 'Group 2',
'Group 1', 'Group 2', 'Group 1', 'Group 2', 'Group 1', 'Group 2', 'Group 1', 'Group 2',
'Group 1', 'Group 2'],
'Date': [
'2025-06-23 00:00:00', '2025-06-24 00:00:00', '2025-06-25 00:00:00', '2025-06-26 00:00:00',
'2025-06-26 00:00:00', '2025-06-27 00:00:00', '2025-06-27 00:00:00', '2025-06-28 00:00:00',
'2025-06-29 00:00:00', '2025-06-30 00:00:00', '2025-06-30 00:00:00', '2025-07-01 00:00:00',
'2025-07-01 00:00:00', '2025-07-02 00:00:00', '2025-07-02 00:00:00', '2025-07-03 00:00:00',
'2025-07-03 00:00:00', '2025-07-04 00:00:00', '2025-07-04 00:00:00', '2025-07-05 00:00:00',
'2025-07-07 00:00:00', '2025-07-08 00:00:00', '2025-07-08 00:00:00', '2025-07-09 00:00:00',
'2025-07-09 00:00:00', '2025-07-10 00:00:00', '2025-07-10 00:00:00', '2025-07-11 00:00:00',
'2025-07-11 00:00:00', '2025-07-12 00:00:00', '2025-07-14 00:00:00', '2025-07-14 00:00:00',
'2025-07-15 00:00:00', '2025-07-15 00:00:00', '2025-07-16 00:00:00', '2025-07-16 00:00:00',
'2025-07-17 00:00:00', '2025-07-17 00:00:00', '2025-07-18 00:00:00', '2025-07-19 00:00:00',
'2025-07-21 00:00:00', '2025-07-21 00:00:00', '2025-07-22 00:00:00', '2025-07-22 00:00:00',
'2025-07-23 00:00:00', '2025-07-23 00:00:00', '2025-07-24 00:00:00', '2025-07-24 00:00:00',
'2025-07-25 00:00:00', '2025-07-25 00:00:00'],
'Weekday': [
0, 1, 2, 3, 3, 4, 4, 5, 6, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
'Data 1': [
45, 27, 58, 69, 33, 42, 38, 7, 1, 60, 37, 45, 31, 66, 30, 61, 29, 36, 41, 9, 44, 29, 34,
46, 36, 55, 34, 29, 40, 8, 62, 49, 26, 30, 51, 31, 57, 36, 40, 11, 65, 37, 50, 34, 38, 35,
70, 25, 27, 42],
'Data 2': [
7, 4, 5, 7, 8, 4, 9, 3, 0, 6, 5, 3, 3, 11, 6, 10, 1, 6, 4, 0, 6, 6, 5, 4, 2, 7, 1, 4, 5,
1, 3, 4, 0, 6, 4, 1, 7, 1, 8, 0, 4, 9, 4, 4, 3, 2, 1, 5, 4, 7]})
Код: Выделить всё
(df.groupby(['Weekday'], as_index=False)[['Data 1', 'Data 2']]
.apply(lambda x: x.shift(1).rolling(window=3, min_periods=3).mean())
.reset_index(level=0, drop=True))
Код: Выделить всё
df['Date'] = pd.to_datetime(df['Date'])
l3w_dates = [
[date - pd.Timedelta(days=7*i) for i in range(1, 4)]
for date in df['Date'].values]
df['L3W'] = l3w_dates
means = []
for group, l3w in df[['Group', 'L3W']].values.tolist():
data = df.loc[
(df['Group'] == group) & (df['Date'].isin(l3w)),
['Data 1', 'Data 2']].copy()
means.append(data.mean())
#Add to the columns
df[['Data 1 L3W Av', 'Data 2 L3W Av']] = means
Код: Выделить всё
Group Date Weekday Data 1 Data 2 L3W Data 1 L3W Av Data 2 L3W Av
0 Group 1 2025-06-23 0 45 7 [Timestamp('2025-06-16 00:00:00'), Timestamp('2025-06-09 00:00:00'), Timestamp('2025-06-02 00:00:00')]
1 Group 1 2025-06-24 1 27 4 [Timestamp('2025-06-17 00:00:00'), Timestamp('2025-06-10 00:00:00'), Timestamp('2025-06-03 00:00:00')]
2 Group 1 2025-06-25 2 58 5 [Timestamp('2025-06-18 00:00:00'), Timestamp('2025-06-11 00:00:00'), Timestamp('2025-06-04 00:00:00')]
3 Group 1 2025-06-26 3 69 7 [Timestamp('2025-06-19 00:00:00'), Timestamp('2025-06-12 00:00:00'), Timestamp('2025-06-05 00:00:00')]
4 Group 2 2025-06-26 3 33 8 [Timestamp('2025-06-19 00:00:00'), Timestamp('2025-06-12 00:00:00'), Timestamp('2025-06-05 00:00:00')]
5 Group 1 2025-06-27 4 42 4 [Timestamp('2025-06-20 00:00:00'), Timestamp('2025-06-13 00:00:00'), Timestamp('2025-06-06 00:00:00')]
6 Group 2 2025-06-27 4 38 9 [Timestamp('2025-06-20 00:00:00'), Timestamp('2025-06-13 00:00:00'), Timestamp('2025-06-06 00:00:00')]
7 Group 2 2025-06-28 5 7 3 [Timestamp('2025-06-21 00:00:00'), Timestamp('2025-06-14 00:00:00'), Timestamp('2025-06-07 00:00:00')]
8 Group 1 2025-06-29 6 1 0 [Timestamp('2025-06-22 00:00:00'), Timestamp('2025-06-15 00:00:00'), Timestamp('2025-06-08 00:00:00')]
9 Group 1 2025-06-30 0 60 6 [Timestamp('2025-06-23 00:00:00'), Timestamp('2025-06-16 00:00:00'), Timestamp('2025-06-09 00:00:00')] 45.0 7.0
10 Group 2 2025-06-30 0 37 5 [Timestamp('2025-06-23 00:00:00'), Timestamp('2025-06-16 00:00:00'), Timestamp('2025-06-09 00:00:00')]
11 Group 1 2025-07-01 1 45 3 [Timestamp('2025-06-24 00:00:00'), Timestamp('2025-06-17 00:00:00'), Timestamp('2025-06-10 00:00:00')] 27.0 4.0
12 Group 2 2025-07-01 1 31 3 [Timestamp('2025-06-24 00:00:00'), Timestamp('2025-06-17 00:00:00'), Timestamp('2025-06-10 00:00:00')]
13 Group 1 2025-07-02 2 66 11 [Timestamp('2025-06-25 00:00:00'), Timestamp('2025-06-18 00:00:00'), Timestamp('2025-06-11 00:00:00')] 58.0 5.0
14 Group 2 2025-07-02 2 30 6 [Timestamp('2025-06-25 00:00:00'), Timestamp('2025-06-18 00:00:00'), Timestamp('2025-06-11 00:00:00')]
15 Group 1 2025-07-03 3 61 10 [Timestamp('2025-06-26 00:00:00'), Timestamp('2025-06-19 00:00:00'), Timestamp('2025-06-12 00:00:00')] 69.0 7.0
16 Group 2 2025-07-03 3 29 1 [Timestamp('2025-06-26 00:00:00'), Timestamp('2025-06-19 00:00:00'), Timestamp('2025-06-12 00:00:00')] 33.0 8.0
17 Group 1 2025-07-04 4 36 6 [Timestamp('2025-06-27 00:00:00'), Timestamp('2025-06-20 00:00:00'), Timestamp('2025-06-13 00:00:00')] 42.0 4.0
18 Group 2 2025-07-04 4 41 4 [Timestamp('2025-06-27 00:00:00'), Timestamp('2025-06-20 00:00:00'), Timestamp('2025-06-13 00:00:00')] 38.0 9.0
19 Group 2 2025-07-05 5 9 0 [Timestamp('2025-06-28 00:00:00'), Timestamp('2025-06-21 00:00:00'), Timestamp('2025-06-14 00:00:00')] 7.0 3.0
20 Group 2 2025-07-07 0 44 6 [Timestamp('2025-06-30 00:00:00'), Timestamp('2025-06-23 00:00:00'), Timestamp('2025-06-16 00:00:00')] 37.0 5.0
21 Group 1 2025-07-08 1 29 6 [Timestamp('2025-07-01 00:00:00'), Timestamp('2025-06-24 00:00:00'), Timestamp('2025-06-17 00:00:00')] 36.0 3.5
22 Group 2 2025-07-08 1 34 5 [Timestamp('2025-07-01 00:00:00'), Timestamp('2025-06-24 00:00:00'), Timestamp('2025-06-17 00:00:00')] 31.0 3.0
23 Group 1 2025-07-09 2 46 4 [Timestamp('2025-07-02 00:00:00'), Timestamp('2025-06-25 00:00:00'), Timestamp('2025-06-18 00:00:00')] 62.0 8.0
24 Group 2 2025-07-09 2 36 2 [Timestamp('2025-07-02 00:00:00'), Timestamp('2025-06-25 00:00:00'), Timestamp('2025-06-18 00:00:00')] 30.0 6.0
25 Group 1 2025-07-10 3 55 7 [Timestamp('2025-07-03 00:00:00'), Timestamp('2025-06-26 00:00:00'), Timestamp('2025-06-19 00:00:00')] 65.0 8.5
26 Group 2 2025-07-10 3 34 1 [Timestamp('2025-07-03 00:00:00'), Timestamp('2025-06-26 00:00:00'), Timestamp('2025-06-19 00:00:00')] 31.0 4.5
27 Group 1 2025-07-11 4 29 4 [Timestamp('2025-07-04 00:00:00'), Timestamp('2025-06-27 00:00:00'), Timestamp('2025-06-20 00:00:00')] 39.0 5.0
28 Group 2 2025-07-11 4 40 5 [Timestamp('2025-07-04 00:00:00'), Timestamp('2025-06-27 00:00:00'), Timestamp('2025-06-20 00:00:00')] 39.5 6.5
29 Group 2 2025-07-12 5 8 1 [Timestamp('2025-07-05 00:00:00'), Timestamp('2025-06-28 00:00:00'), Timestamp('2025-06-21 00:00:00')] 8.0 1.5
30 Group 1 2025-07-14 0 62 3 [Timestamp('2025-07-07 00:00:00'), Timestamp('2025-06-30 00:00:00'), Timestamp('2025-06-23 00:00:00')] 52.5 6.5
31 Group 2 2025-07-14 0 49 4 [Timestamp('2025-07-07 00:00:00'), Timestamp('2025-06-30 00:00:00'), Timestamp('2025-06-23 00:00:00')] 40.5 5.5
32 Group 1 2025-07-15 1 26 0 [Timestamp('2025-07-08 00:00:00'), Timestamp('2025-07-01 00:00:00'), Timestamp('2025-06-24 00:00:00')] 33.666666666666664 4.333333333333333
33 Group 2 2025-07-15 1 30 6 [Timestamp('2025-07-08 00:00:00'), Timestamp('2025-07-01 00:00:00'), Timestamp('2025-06-24 00:00:00')] 32.5 4.0
34 Group 1 2025-07-16 2 51 4 [Timestamp('2025-07-09 00:00:00'), Timestamp('2025-07-02 00:00:00'), Timestamp('2025-06-25 00:00:00')] 56.666666666666664 6.666666666666667
35 Group 2 2025-07-16 2 31 1 [Timestamp('2025-07-09 00:00:00'), Timestamp('2025-07-02 00:00:00'), Timestamp('2025-06-25 00:00:00')] 33.0 4.0
36 Group 1 2025-07-17 3 57 7 [Timestamp('2025-07-10 00:00:00'), Timestamp('2025-07-03 00:00:00'), Timestamp('2025-06-26 00:00:00')] 61.666666666666664 8.0
37 Group 2 2025-07-17 3 36 1 [Timestamp('2025-07-10 00:00:00'), Timestamp('2025-07-03 00:00:00'), Timestamp('2025-06-26 00:00:00')] 32.0 3.3333333333333335
38 Group 1 2025-07-18 4 40 8 [Timestamp('2025-07-11 00:00:00'), Timestamp('2025-07-04 00:00:00'), Timestamp('2025-06-27 00:00:00')] 35.666666666666664 4.666666666666667
39 Group 2 2025-07-19 5 11 0 [Timestamp('2025-07-12 00:00:00'), Timestamp('2025-07-05 00:00:00'), Timestamp('2025-06-28 00:00:00')] 8.0 1.3333333333333333
40 Group 1 2025-07-21 0 65 4 [Timestamp('2025-07-14 00:00:00'), Timestamp('2025-07-07 00:00:00'), Timestamp('2025-06-30 00:00:00')] 61.0 4.5
41 Group 2 2025-07-21 0 37 9 [Timestamp('2025-07-14 00:00:00'), Timestamp('2025-07-07 00:00:00'), Timestamp('2025-06-30 00:00:00')] 43.333333333333336 5.0
42 Group 1 2025-07-22 1 50 4 [Timestamp('2025-07-15 00:00:00'), Timestamp('2025-07-08 00:00:00'), Timestamp('2025-07-01 00:00:00')] 33.333333333333336 3.0
43 Group 2 2025-07-22 1 34 4 [Timestamp('2025-07-15 00:00:00'), Timestamp('2025-07-08 00:00:00'), Timestamp('2025-07-01 00:00:00')] 31.666666666666668 4.666666666666667
44 Group 1 2025-07-23 2 38 3 [Timestamp('2025-07-16 00:00:00'), Timestamp('2025-07-09 00:00:00'), Timestamp('2025-07-02 00:00:00')] 54.333333333333336 6.333333333333333
45 Group 2 2025-07-23 2 35 2 [Timestamp('2025-07-16 00:00:00'), Timestamp('2025-07-09 00:00:00'), Timestamp('2025-07-02 00:00:00')] 32.333333333333336 3.0
46 Group 1 2025-07-24 3 70 1 [Timestamp('2025-07-17 00:00:00'), Timestamp('2025-07-10 00:00:00'), Timestamp('2025-07-03 00:00:00')] 57.666666666666664 8.0
47 Group 2 2025-07-24 3 25 5 [Timestamp('2025-07-17 00:00:00'), Timestamp('2025-07-10 00:00:00'), Timestamp('2025-07-03 00:00:00')] 33.0 1.0
48 Group 1 2025-07-25 4 27 4 [Timestamp('2025-07-18 00:00:00'), Timestamp('2025-07-11 00:00:00'), Timestamp('2025-07-04 00:00:00')] 35.0 6.0
49 Group 2 2025-07-25 4 42 7 [Timestamp('2025-07-18 00:00:00'), Timestamp('2025-07-11 00:00:00'), Timestamp('2025-07-04 00:00:00')] 40.5 4.5
21 июля 2026 г. Группа 1 - только имеет 2 из последних 3 недель (06–30 и 07–14), а 07–07 отсутствует. Однако есть 06-23, который присутствует в данных за один и тот же день недели, но не за последние 3 недели.
25 июля 2025 г. Группа 2 — содержит только 2 из последних 3 недель (07–11 и 07–04), а 07–18 отсутствует. Однако есть 06-27, который присутствует в данных того же дня недели, но не за последние 3 недели.
Подробнее здесь: https://stackoverflow.com/questions/798 ... -within-gr
Мобильная версия