Record_ID,Event_ID,Policy No,Claim Name,Client_number's_PO,Claim No,Life_Assured,Client_number's_LA,Received Date,Incur Date,Product_code,Claim_type,Benefit/component_code,,
12,40,80063166,Tr????ng An Diå÷??m,10003205,1,1,10053675,29/11/2019,10/11/2019,Rider03,RC,Rider03_04,,
1,126,80063166,Tr????ng An Diå÷??m,10003205,2,2,10003205,15/05/2020,4/05/2020,Rider03,RC,Rider03_04,,
19,188,80063166,Tr????ng An Diå÷??m,10003205,5,2,10003205,3/07/2020,6/06/2020,Rider03,RC,Rider03_04,,
11,189,80063166,Tr????ng An Diå÷??m,10003205,4,1,10053675,3/07/2020,7/06/2020,Rider03,RC,Rider03_04,,
16,181,80069631,Tr????ng An Diå÷??m,10003205,2,1,10073684,2/07/2020,7/06/2020,Rider03,RC,Rider03_04,,
3,610,80063166,Tr????ng An Diå÷??m,10003205,6,2,10003205,29/10/2020,7/10/2020,Rider03,RC,Rider03_04,,
14,611,80063166,Tr????ng An Diå÷??m,10003205,7,1,10053675,29/10/2020,18/10/2020,Rider03,RC,Rider03_04,,
2,1014,80063166,Tr????ng An Diå÷??m,10003205,8,2,10003205,20/12/2020,26/11/2020,Rider03,RC,Rider03_04,,
17,1015,80069631,Tr????ng An Diå÷??m,10003205,4,1,10073684,20/12/2020,4/12/2020,Rider03,RC,Rider03_04,,
13,3687,80063166,Tr????ng An Diå÷??m,10003205,9,1,10053675,21/09/2021,7/09/2021,Rider03,RC,Rider03_04,,
18,3689,80069631,Tr????ng An Diå÷??m,10003205,5,1,10073684,21/09/2021,7/09/2021,Rider03,RC,Rider03_04,,
20,3690,80069631,Tr????ng An Diå÷??m,10003205,6,1,10073684,21/09/2021,16/09/2021,Rider03,RC,Rider03_04,,
6,5635,80063166,Tr????ng An Diå÷??m,10003205,13,2,10003205,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,,
8,5637,80093241,Tr????ng An Diå÷??m,10003205,1,1,10003213,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,,
43,5634,80063166,Tr????ng An Diå÷??m,10003205,12,3,10003213,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,,
9,7544,80093241,Tr????ng An Diå÷??m,10003205,2,1,10003213,19/04/2022,8/04/2022,Rider03,RC,Rider03_04,,
41,7544,80063166,Tr????ng An Diå÷??m,10003205,15,3,10003213,19/04/2022,8/04/2022,Rider03,RC,Rider03_04,,
7,13004,80063166,Tr????ng An Diå÷??m,10003205,18,2,10003205,21/02/2023,5/01/2023,Rider03,RC,Rider03_04,,
42,13005,80063166,Tr????ng An Diå÷??m,10003205,19,3,10003213,21/02/2023,5/01/2023,Rider03,RC,Rider03_04,,
0,13097,80063166,Tr????ng An Diå÷??m,10003205,21,2,10003205,24/02/2023,6/02/2023,Rider03,RC,Rider03_04,,
10,12881,80093241,Tr????ng An Diå÷??m,10003205,3,1,10003213,14/02/2023,6/02/2023,Rider03,RC,Rider03_04,,
15,12880,80063166,Tr????ng An Diå÷??m,10003205,16,1,10053675,14/02/2023,6/02/2023,Rider03,RC,Rider03_04,,
44,13096,80063166,Tr????ng An Diå÷??m,10003205,20,3,10003213,24/02/2023,6/02/2023,Rider03,RC,Rider03_04,,
4,26741,80063166,Tr????ng An Diå÷??m,10003205,27,2,10003205,6/12/2023,22/10/2023,Rider03,RC,Rider03_04,,
5,24564,80063166,Tr????ng An Diå÷??m,10003205,26,2,10003205,1/11/2023,28/10/2023,Rider03,RC,Rider03_04,,
def flag_date_within_m_months(group, col_label, col_date):
group[col_date] = pd.to_datetime(group[col_date])
group = group.sort_values(by=col_date) # Sort by date
group[col_label] = False
# Create a rolling window to count claims
for i in range(len(group)):
current_date = group[col_date].iloc[i]
window_start = current_date - pd.DateOffset(months=m)
# Count the number of claims within the window
count_within_window = group[(group[col_date] = window_start)].shape[0]
if count_within_window > n:
# Flag all claims in the window
group.loc[(group[col_date] = window_start), col_label] = True
print(f"Flagging claims between {window_start} and {current_date}") # Debug output
return group
Однако, когда я использую его следующим образом, я не получаю ожидаемых результатов. Цель следующего блока кода — пометить строки, в которых клиент («Client_number's_PO») подает более [n] претензий на одно и то же страховое возмещение/компонент («Benefit/comComponent_code») в течение [m] месяцев (понесенные /дата события) При проверке вручную есть много строк, которые должны быть помечены как True, но когда я запускаю это, все строки имеют значение False.:
# Control parameters
n = 5
m = 6
# Create an index so that every row with the same 'Client_number's_PO' and 'Benefit/component_code' has the same index number.
claim_data["1. Index"] = claim_data_m.groupby(["Client_number's_PO", "Benefit/component_code"]).ngroup()
# Flag the rows based on the condition
claim_data["1"] = False
claim_data.groupby("1. Index").apply(flag_date_within_m_months, col_label="1", col_date="Incur Date")
Пожалуйста, помогите мне это исправить.
Я попробовал то, что описал выше, и ожидаю, что за m месяцев появятся строки с более чем n вхождениями с те же «Client_number's_PO» и «Benefit/comComponent_code» будут помечены как True в столбце «1». Например, для строк с Client_number's_PO 10003205 и Benefit/comComponent_code Rider03_04 я ожидаю, что результаты будут такими (ИСТИНА, если в течение 6 месяцев поступило более 5 претензий на основе «Даты возникновения»):
Record_ID,Event_ID,Policy No,Claim Name,Client_number's_PO,Claim No,Life_Assured,Client_number's_LA,Received Date,Incur Date,Product_code,Claim_type,Benefit/component_code,1,
12,40,80063166,Tr????ng An Diå÷??m,10003205,1,1,10053675,29/11/2019,10/11/2019,Rider03,RC,Rider03_04,FALSE,
1,126,80063166,Tr????ng An Diå÷??m,10003205,2,2,10003205,15/05/2020,4/05/2020,Rider03,RC,Rider03_04,TRUE,
19,188,80063166,Tr????ng An Diå÷??m,10003205,5,2,10003205,3/07/2020,6/06/2020,Rider03,RC,Rider03_04,TRUE,
11,189,80063166,Tr????ng An Diå÷??m,10003205,4,1,10053675,3/07/2020,7/06/2020,Rider03,RC,Rider03_04,TRUE,
16,181,80069631,Tr????ng An Diå÷??m,10003205,2,1,10073684,2/07/2020,7/06/2020,Rider03,RC,Rider03_04,TRUE,
3,610,80063166,Tr????ng An Diå÷??m,10003205,6,2,10003205,29/10/2020,7/10/2020,Rider03,RC,Rider03_04,TRUE,
14,611,80063166,Tr????ng An Diå÷??m,10003205,7,1,10053675,29/10/2020,18/10/2020,Rider03,RC,Rider03_04,TRUE,
2,1014,80063166,Tr????ng An Diå÷??m,10003205,8,2,10003205,20/12/2020,26/11/2020,Rider03,RC,Rider03_04,TRUE,
17,1015,80069631,Tr????ng An Diå÷??m,10003205,4,1,10073684,20/12/2020,4/12/2020,Rider03,RC,Rider03_04,TRUE,
13,3687,80063166,Tr????ng An Diå÷??m,10003205,9,1,10053675,21/09/2021,7/09/2021,Rider03,RC,Rider03_04,TRUE,
18,3689,80069631,Tr????ng An Diå÷??m,10003205,5,1,10073684,21/09/2021,7/09/2021,Rider03,RC,Rider03_04,TRUE,
20,3690,80069631,Tr????ng An Diå÷??m,10003205,6,1,10073684,21/09/2021,16/09/2021,Rider03,RC,Rider03_04,TRUE,
6,5635,80063166,Tr????ng An Diå÷??m,10003205,13,2,10003205,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,TRUE,
8,5637,80093241,Tr????ng An Diå÷??m,10003205,1,1,10003213,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,TRUE,
43,5634,80063166,Tr????ng An Diå÷??m,10003205,12,3,10003213,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,TRUE,
9,7544,80093241,Tr????ng An Diå÷??m,10003205,2,1,10003213,19/04/2022,8/04/2022,Rider03,RC,Rider03_04,FALSE,
41,7544,80063166,Tr????ng An Diå÷??m,10003205,15,3,10003213,19/04/2022,8/04/2022,Rider03,RC,Rider03_04,FALSE,
7,13004,80063166,Tr????ng An Diå÷??m,10003205,18,2,10003205,21/02/2023,5/01/2023,Rider03,RC,Rider03_04,TRUE,
42,13005,80063166,Tr????ng An Diå÷??m,10003205,19,3,10003213,21/02/2023,5/01/2023,Rider03,RC,Rider03_04,TRUE,
0,13097,80063166,Tr????ng An Diå÷??m,10003205,21,2,10003205,24/02/2023,6/02/2023,Rider03,RC,Rider03_04,TRUE,
10,12881,80093241,Tr????ng An Diå÷??m,10003205,3,1,10003213,14/02/2023,6/02/2023,Rider03,RC,Rider03_04,TRUE,
15,12880,80063166,Tr????ng An Diå÷??m,10003205,16,1,10053675,14/02/2023,6/02/2023,Rider03,RC,Rider03_04,TRUE,
44,13096,80063166,Tr????ng An Diå÷??m,10003205,20,3,10003213,24/02/2023,6/02/2023,Rider03,RC,Rider03_04,TRUE,
4,26741,80063166,Tr????ng An Diå÷??m,10003205,27,2,10003205,6/12/2023,22/10/2023,Rider03,RC,Rider03_04,FALSE,
5,24564,80063166,Tr????ng An Diå÷??m,10003205,26,2,10003205,1/11/2023,28/10/2023,Rider03,RC,Rider03_04,FALSE,
У меня есть входная таблица, которую я назвал «claim_data». Выглядит это так: [code]Record_ID,Event_ID,Policy No,Claim Name,Client_number's_PO,Claim No,Life_Assured,Client_number's_LA,Received Date,Incur Date,Product_code,Claim_type,Benefit/component_code,, 12,40,80063166,Tr????ng An Diå÷??m,10003205,1,1,10053675,29/11/2019,10/11/2019,Rider03,RC,Rider03_04,, 1,126,80063166,Tr????ng An Diå÷??m,10003205,2,2,10003205,15/05/2020,4/05/2020,Rider03,RC,Rider03_04,, 19,188,80063166,Tr????ng An Diå÷??m,10003205,5,2,10003205,3/07/2020,6/06/2020,Rider03,RC,Rider03_04,, 11,189,80063166,Tr????ng An Diå÷??m,10003205,4,1,10053675,3/07/2020,7/06/2020,Rider03,RC,Rider03_04,, 16,181,80069631,Tr????ng An Diå÷??m,10003205,2,1,10073684,2/07/2020,7/06/2020,Rider03,RC,Rider03_04,, 3,610,80063166,Tr????ng An Diå÷??m,10003205,6,2,10003205,29/10/2020,7/10/2020,Rider03,RC,Rider03_04,, 14,611,80063166,Tr????ng An Diå÷??m,10003205,7,1,10053675,29/10/2020,18/10/2020,Rider03,RC,Rider03_04,, 2,1014,80063166,Tr????ng An Diå÷??m,10003205,8,2,10003205,20/12/2020,26/11/2020,Rider03,RC,Rider03_04,, 17,1015,80069631,Tr????ng An Diå÷??m,10003205,4,1,10073684,20/12/2020,4/12/2020,Rider03,RC,Rider03_04,, 13,3687,80063166,Tr????ng An Diå÷??m,10003205,9,1,10053675,21/09/2021,7/09/2021,Rider03,RC,Rider03_04,, 18,3689,80069631,Tr????ng An Diå÷??m,10003205,5,1,10073684,21/09/2021,7/09/2021,Rider03,RC,Rider03_04,, 20,3690,80069631,Tr????ng An Diå÷??m,10003205,6,1,10073684,21/09/2021,16/09/2021,Rider03,RC,Rider03_04,, 6,5635,80063166,Tr????ng An Diå÷??m,10003205,13,2,10003205,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,, 8,5637,80093241,Tr????ng An Diå÷??m,10003205,1,1,10003213,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,, 43,5634,80063166,Tr????ng An Diå÷??m,10003205,12,3,10003213,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,, 9,7544,80093241,Tr????ng An Diå÷??m,10003205,2,1,10003213,19/04/2022,8/04/2022,Rider03,RC,Rider03_04,, 41,7544,80063166,Tr????ng An Diå÷??m,10003205,15,3,10003213,19/04/2022,8/04/2022,Rider03,RC,Rider03_04,, 7,13004,80063166,Tr????ng An Diå÷??m,10003205,18,2,10003205,21/02/2023,5/01/2023,Rider03,RC,Rider03_04,, 42,13005,80063166,Tr????ng An Diå÷??m,10003205,19,3,10003213,21/02/2023,5/01/2023,Rider03,RC,Rider03_04,, 0,13097,80063166,Tr????ng An Diå÷??m,10003205,21,2,10003205,24/02/2023,6/02/2023,Rider03,RC,Rider03_04,, 10,12881,80093241,Tr????ng An Diå÷??m,10003205,3,1,10003213,14/02/2023,6/02/2023,Rider03,RC,Rider03_04,, 15,12880,80063166,Tr????ng An Diå÷??m,10003205,16,1,10053675,14/02/2023,6/02/2023,Rider03,RC,Rider03_04,, 44,13096,80063166,Tr????ng An Diå÷??m,10003205,20,3,10003213,24/02/2023,6/02/2023,Rider03,RC,Rider03_04,, 4,26741,80063166,Tr????ng An Diå÷??m,10003205,27,2,10003205,6/12/2023,22/10/2023,Rider03,RC,Rider03_04,, 5,24564,80063166,Tr????ng An Diå÷??m,10003205,26,2,10003205,1/11/2023,28/10/2023,Rider03,RC,Rider03_04,, [/code] claim_data Вот как выглядит моя функция. [code]def flag_date_within_m_months(group, col_label, col_date): group[col_date] = pd.to_datetime(group[col_date]) group = group.sort_values(by=col_date) # Sort by date group[col_label] = False
# Create a rolling window to count claims for i in range(len(group)): current_date = group[col_date].iloc[i] window_start = current_date - pd.DateOffset(months=m)
# Count the number of claims within the window count_within_window = group[(group[col_date] = window_start)].shape[0]
if count_within_window > n: # Flag all claims in the window group.loc[(group[col_date] = window_start), col_label] = True print(f"Flagging claims between {window_start} and {current_date}") # Debug output
return group [/code] Однако, когда я использую его следующим образом, я не получаю ожидаемых результатов. Цель следующего блока кода — пометить строки, в которых клиент («Client_number's_PO») подает более [n] претензий на одно и то же страховое возмещение/компонент («Benefit/comComponent_code») в течение [m] месяцев (понесенные /дата события) При проверке вручную есть много строк, которые должны быть помечены как True, но когда я запускаю это, все строки имеют значение False.: [code]# Control parameters n = 5 m = 6
# Create an index so that every row with the same 'Client_number's_PO' and 'Benefit/component_code' has the same index number. claim_data["1. Index"] = claim_data_m.groupby(["Client_number's_PO", "Benefit/component_code"]).ngroup()
# Flag the rows based on the condition claim_data["1"] = False claim_data.groupby("1. Index").apply(flag_date_within_m_months, col_label="1", col_date="Incur Date") [/code] Пожалуйста, помогите мне это исправить. Я попробовал то, что описал выше, и ожидаю, что за m месяцев появятся строки с более чем n вхождениями с те же «Client_number's_PO» и «Benefit/comComponent_code» будут помечены как True в столбце «1». Например, для строк с Client_number's_PO 10003205 и Benefit/comComponent_code Rider03_04 я ожидаю, что результаты будут такими (ИСТИНА, если в течение 6 месяцев поступило более 5 претензий на основе «Даты возникновения»): [code]Record_ID,Event_ID,Policy No,Claim Name,Client_number's_PO,Claim No,Life_Assured,Client_number's_LA,Received Date,Incur Date,Product_code,Claim_type,Benefit/component_code,1, 12,40,80063166,Tr????ng An Diå÷??m,10003205,1,1,10053675,29/11/2019,10/11/2019,Rider03,RC,Rider03_04,FALSE, 1,126,80063166,Tr????ng An Diå÷??m,10003205,2,2,10003205,15/05/2020,4/05/2020,Rider03,RC,Rider03_04,TRUE, 19,188,80063166,Tr????ng An Diå÷??m,10003205,5,2,10003205,3/07/2020,6/06/2020,Rider03,RC,Rider03_04,TRUE, 11,189,80063166,Tr????ng An Diå÷??m,10003205,4,1,10053675,3/07/2020,7/06/2020,Rider03,RC,Rider03_04,TRUE, 16,181,80069631,Tr????ng An Diå÷??m,10003205,2,1,10073684,2/07/2020,7/06/2020,Rider03,RC,Rider03_04,TRUE, 3,610,80063166,Tr????ng An Diå÷??m,10003205,6,2,10003205,29/10/2020,7/10/2020,Rider03,RC,Rider03_04,TRUE, 14,611,80063166,Tr????ng An Diå÷??m,10003205,7,1,10053675,29/10/2020,18/10/2020,Rider03,RC,Rider03_04,TRUE, 2,1014,80063166,Tr????ng An Diå÷??m,10003205,8,2,10003205,20/12/2020,26/11/2020,Rider03,RC,Rider03_04,TRUE, 17,1015,80069631,Tr????ng An Diå÷??m,10003205,4,1,10073684,20/12/2020,4/12/2020,Rider03,RC,Rider03_04,TRUE, 13,3687,80063166,Tr????ng An Diå÷??m,10003205,9,1,10053675,21/09/2021,7/09/2021,Rider03,RC,Rider03_04,TRUE, 18,3689,80069631,Tr????ng An Diå÷??m,10003205,5,1,10073684,21/09/2021,7/09/2021,Rider03,RC,Rider03_04,TRUE, 20,3690,80069631,Tr????ng An Diå÷??m,10003205,6,1,10073684,21/09/2021,16/09/2021,Rider03,RC,Rider03_04,TRUE, 6,5635,80063166,Tr????ng An Diå÷??m,10003205,13,2,10003205,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,TRUE, 8,5637,80093241,Tr????ng An Diå÷??m,10003205,1,1,10003213,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,TRUE, 43,5634,80063166,Tr????ng An Diå÷??m,10003205,12,3,10003213,25/12/2021,8/11/2021,Rider03,RC,Rider03_04,TRUE, 9,7544,80093241,Tr????ng An Diå÷??m,10003205,2,1,10003213,19/04/2022,8/04/2022,Rider03,RC,Rider03_04,FALSE, 41,7544,80063166,Tr????ng An Diå÷??m,10003205,15,3,10003213,19/04/2022,8/04/2022,Rider03,RC,Rider03_04,FALSE, 7,13004,80063166,Tr????ng An Diå÷??m,10003205,18,2,10003205,21/02/2023,5/01/2023,Rider03,RC,Rider03_04,TRUE, 42,13005,80063166,Tr????ng An Diå÷??m,10003205,19,3,10003213,21/02/2023,5/01/2023,Rider03,RC,Rider03_04,TRUE, 0,13097,80063166,Tr????ng An Diå÷??m,10003205,21,2,10003205,24/02/2023,6/02/2023,Rider03,RC,Rider03_04,TRUE, 10,12881,80093241,Tr????ng An Diå÷??m,10003205,3,1,10003213,14/02/2023,6/02/2023,Rider03,RC,Rider03_04,TRUE, 15,12880,80063166,Tr????ng An Diå÷??m,10003205,16,1,10053675,14/02/2023,6/02/2023,Rider03,RC,Rider03_04,TRUE, 44,13096,80063166,Tr????ng An Diå÷??m,10003205,20,3,10003213,24/02/2023,6/02/2023,Rider03,RC,Rider03_04,TRUE, 4,26741,80063166,Tr????ng An Diå÷??m,10003205,27,2,10003205,6/12/2023,22/10/2023,Rider03,RC,Rider03_04,FALSE, 5,24564,80063166,Tr????ng An Diå÷??m,10003205,26,2,10003205,1/11/2023,28/10/2023,Rider03,RC,Rider03_04,FALSE, [/code] ожидаемые результаты