Я работаю над сценарием Python, использующим Pandas для анализа данных о событиях. Моя цель — вычислить пересечение активных событий.
Мой код работает нормально, если одно и то же событие не происходит дважды. Однако если это произойдет дважды, мой код вернет пустые кадры данных.
Вот дефектный код:
import pandas as pd
def is_active(df, event, start_date, end_date):
"""Filters events that are active within a time range."""
filter = (df['event_name'] == event) & (
(df['start_date'] = start_date)
)
return df[filter].shape[0] > 0
def is_not_active(df, event, start_date, end_date):
"""Filters events that are inactive within a time range."""
filter = (df['event_name'] == event) & (
(df['start_date'] = start_date)
)
return df[filter].empty
def generate_active_intersection(df_events, active_events, inactive_events, combination):
"""Generates a DataFrame with active events and filters inactive ones."""
# Define initial intersection range as the maximum start_date and minimum end_date among active events
df_filtered = df_events[df_events['event_name'].isin(active_events)]
if df_filtered.empty:
return pd.DataFrame() # No common active events
max_start_date = df_filtered['start_date'].max()
min_end_date = df_filtered['end_date'].min()
# Ensure the time range is valid
if max_start_date > min_end_date:
return pd.DataFrame() # No overlap of all active events
# Verify that there is a temporal intersection among all active events
for event in active_events:
if not is_active(df_events, event, max_start_date, min_end_date):
return pd.DataFrame() # No overlap of all active events
# Verify that inactive events are NOT active in the same time range
for inactive_event in inactive_events:
event_without_no = inactive_event.replace("NO_", "")
if not is_not_active(df_events, event_without_no, max_start_date, min_end_date):
return pd.DataFrame() # Some event that should be inactive is active
# Calculate active time in seconds
active_time_seconds = (min_end_date - max_start_date).total_seconds()
# If all conditions are met, return the common time range with the catalog format
return pd.DataFrame({
'start_date': [max_start_date],
'end_date': [min_end_date],
'catalog': [combination], # Use the original catalog combination
'active_time_seconds': [active_time_seconds]
})
def process_catalog(df_events, df_catalog):
"""Processes each catalog combination and generates the corresponding DataFrames."""
results = []
for index, row in df_catalog.iterrows():
combination = row['catalog']
events = combination.split(', ')
active_events = [e for e in events if not e.startswith('NO_')]
inactive_events = [e for e in events if e.startswith('NO_')]
df_result = generate_active_intersection(df_events, active_events, inactive_events, combination)
if not df_result.empty:
results.append(df_result)
if results:
return pd.concat(results, ignore_index=True)
else:
return pd.DataFrame(columns=['start_date', 'end_date', 'catalog', 'active_time_seconds']) # No valid results
# Example DataFrames:
events = pd.DataFrame({
'event_name': ['C', 'A', 'B', 'D', 'E', 'F'],
'start_date': pd.to_datetime([
'2023-10-01 09:45:00',
'2023-10-01 12:00:00',
'2023-10-02 14:30:00',
'2023-10-04 16:00:00',
'2023-10-05 18:15:00',
'2023-10-05 18:20:00'
]),
'end_date': pd.to_datetime([
'2023-10-03 11:30:00',
'2023-10-05 18:00:00',
'2023-10-06 23:59:59',
'2023-10-07 08:45:00',
'2023-10-08 20:00:00',
'2023-10-08 10:00:00',
])
})
candidates = pd.DataFrame({
'catalog': ['A, B, C', 'B, C, D', 'A, NO_E, C']
})
# Process and get the results
df_results = process_catalog(events, candidates)
print(f"Catalog results with start_time, end_time, name and active time in seconds: \n", df_results, "\n")
Код работает нормально и так, но если вы замените последнюю букву «F» на «A» в «event_name», все завершится неудачно, и функция вернет пустой фрейм данных как результат.
Я работаю над сценарием Python, использующим Pandas для анализа данных о событиях. Моя цель — вычислить пересечение активных событий. Мой код работает нормально, если одно и то же событие не происходит дважды. Однако если это произойдет дважды, мой код вернет пустые кадры данных. Вот дефектный код: [code]import pandas as pd def is_active(df, event, start_date, end_date): """Filters events that are active within a time range.""" filter = (df['event_name'] == event) & ( (df['start_date'] = start_date) ) return df[filter].shape[0] > 0
def is_not_active(df, event, start_date, end_date): """Filters events that are inactive within a time range.""" filter = (df['event_name'] == event) & ( (df['start_date'] = start_date) ) return df[filter].empty
def generate_active_intersection(df_events, active_events, inactive_events, combination): """Generates a DataFrame with active events and filters inactive ones.""" # Define initial intersection range as the maximum start_date and minimum end_date among active events df_filtered = df_events[df_events['event_name'].isin(active_events)]
if df_filtered.empty: return pd.DataFrame() # No common active events
# Ensure the time range is valid if max_start_date > min_end_date: return pd.DataFrame() # No overlap of all active events
# Verify that there is a temporal intersection among all active events for event in active_events: if not is_active(df_events, event, max_start_date, min_end_date): return pd.DataFrame() # No overlap of all active events
# Verify that inactive events are NOT active in the same time range for inactive_event in inactive_events: event_without_no = inactive_event.replace("NO_", "") if not is_not_active(df_events, event_without_no, max_start_date, min_end_date): return pd.DataFrame() # Some event that should be inactive is active
# Calculate active time in seconds active_time_seconds = (min_end_date - max_start_date).total_seconds()
# If all conditions are met, return the common time range with the catalog format return pd.DataFrame({ 'start_date': [max_start_date], 'end_date': [min_end_date], 'catalog': [combination], # Use the original catalog combination 'active_time_seconds': [active_time_seconds] })
def process_catalog(df_events, df_catalog): """Processes each catalog combination and generates the corresponding DataFrames.""" results = []
for index, row in df_catalog.iterrows(): combination = row['catalog'] events = combination.split(', ')
active_events = [e for e in events if not e.startswith('NO_')] inactive_events = [e for e in events if e.startswith('NO_')]
if results: return pd.concat(results, ignore_index=True) else: return pd.DataFrame(columns=['start_date', 'end_date', 'catalog', 'active_time_seconds']) # No valid results [/code] Пример данных находится здесь: [code]# Example DataFrames: events = pd.DataFrame({ 'event_name': ['C', 'A', 'B', 'D', 'E', 'F'], 'start_date': pd.to_datetime([ '2023-10-01 09:45:00', '2023-10-01 12:00:00', '2023-10-02 14:30:00', '2023-10-04 16:00:00', '2023-10-05 18:15:00', '2023-10-05 18:20:00' ]), 'end_date': pd.to_datetime([ '2023-10-03 11:30:00', '2023-10-05 18:00:00', '2023-10-06 23:59:59', '2023-10-07 08:45:00', '2023-10-08 20:00:00', '2023-10-08 10:00:00', ]) }) candidates = pd.DataFrame({ 'catalog': ['A, B, C', 'B, C, D', 'A, NO_E, C'] }) # Process and get the results df_results = process_catalog(events, candidates) print(f"Catalog results with start_time, end_time, name and active time in seconds: \n", df_results, "\n") [/code] Код работает нормально и так, но если вы замените последнюю букву «F» на «A» в «event_name», все завершится неудачно, и функция вернет пустой фрейм данных как результат.
Я работаю над сценарием Python, использующим Pandas для анализа данных о событиях. Моя цель — вычислить пересечение активных событий.
Мой код работает нормально, если одно и то же событие не происходит дважды. Однако если это произойдет дважды, мой...
Предположим, у нас есть две карты с некоторыми общими ключами, но разными значениями. Я хотел бы перебрать «пересечение» двух карт, где ключи одинаковы. Затем я хотел бы выполнить функцию преобразования f(key, value_in_map1, value_in_map2).