Я строю график скорости относительно времени, но из-за несогласованности результатов оптического распознавания символов я столкнулся с проблемой случайных точек.
Код: Выделить всё
def remove_outliers(df: pd.DataFrame) -> pd.DataFrame:
"""
Change obvious outliers to None in the DataFrame.
Args:
df (pd.DataFrame): The DataFrame to clean.
Returns:
pd.DataFrame: The cleaned DataFrame.
"""
# Define thresholds for outliers
sh_speed_threshold = 6000 # Example threshold for Superheavy speed
sh_altitude_threshold = 100 # Example threshold for Superheavy altitude
ss_speed_threshold = 30000 # Example threshold for Starship speed
ss_altitude_threshold = 200 # Example threshold for Starship altitude
# Change outliers to None
df.loc[df['superheavy_speed'] > sh_speed_threshold, 'superheavy_speed'] = None
df.loc[df['superheavy_altitude'] > sh_altitude_threshold, 'superheavy_altitude'] = None
df.loc[df['starship_speed'] > ss_speed_threshold, 'starship_speed'] = None
df.loc[df['starship_altitude'] > ss_altitude_threshold, 'starship_altitude'] = None
return df
def delete_outliers_within_window(df: pd.DataFrame, window_size: int = 5) -> pd.DataFrame:
"""
Delete outliers within a rolling window from the DataFrame.
Args:
df (pd.DataFrame): The DataFrame to clean.
window_size (int): The window size for the rolling operations.
Returns:
pd.DataFrame: The cleaned DataFrame.
"""
for column in ['superheavy_speed', 'superheavy_altitude', 'starship_speed', 'starship_altitude']:
rolling_median = df[column].rolling(window=window_size, center=True).median()
rolling_std = df[column].rolling(window=window_size, center=True).std()
df = df[(df[column] >= (rolling_median - 2 * rolling_std)) & (df[column]
Подробнее здесь: [url]https://stackoverflow.com/questions/79275086/clearing-outliers-in-python-using-pandas[/url]