Вот воспроизводимый пример:
Код: Выделить всё
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
# Load the Titanic dataset
titanic = sns.load_dataset('titanic')
# Select features and target
features = ['age', 'fare', 'sex']
X = titanic[features]
y = titanic['survived']
# Preprocessing pipelines for numeric and categorical features
numeric_features = ['age', 'fare']
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant')),
('scaler', StandardScaler())
])
categorical_features = ['sex']
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant')),
('onehot', OneHotEncoder(drop='first'))
])
# Combine preprocessing steps
preprocessor = ColumnTransformer(transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)
])
# Initialize classifier and feature selector
clf = LogisticRegression(max_iter=1000, solver='liblinear')
sfs = SequentialFeatureSelector(clf, direction='forward')
# Create a pipeline that includes preprocessing, feature selection, and classification
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('feature_selection', sfs),
('classifier', clf)
])
# Define the parameter grid to search over
param_grid = {
'feature_selection__n_features_to_select': [2],
'classifier__C': [0.1, 1.0, 10.0], # Regularization strength
}
# Create and run the grid search
grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X, y)
# Output the best parameters and score
print("Best parameters found:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)
Код: Выделить всё
X
Код: Выделить всё
feature_selection__n_features_to_select: [2,3]
Проблема здесь в том, что SequentialFeatureSelector этого не делает. рассматривать выбор всех функций (так называемый сквозной селектор) как допустимый выбор функций.
Я хотел бы запустить поиск по сетке, учитывающий настройку
Код: Выделить всё
('feature_selection', 'passthrough')
Подробнее здесь: https://stackoverflow.com/questions/793 ... to-evaluat