Похожие проблемы, но не решены:
Ошибка при выполнении байесовского метода для поиск лучшего гиперпараметра
При поиске решения я получил руководство здесь: https://medium.com/latinxinai/tuning-de ... 50c1b1d4ba
Код: Выделить всё
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from skopt import BayesSearchCV
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop, Nadam
from scikeras.wrappers import KerasRegressor
from skopt.space import Categorical
# Load dataset
dataset_path = '...'
df = pd.read_excel(dataset_path)
# Split features and target
target_col = 'Y' # Specify the target column
X = df.drop(columns=[target_col])
y = df[target_col]
# Split data into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Callbacks
val_cb_reducelr = tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
mode='min',
factor=0.01,
patience=10,
verbose=1,
min_lr=0.00001
)
val_cb_earlystop = tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
mode='min',
min_delta=0.001,
patience=15,
verbose=1
)
val_cb_checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath='checkpoints/model-{epoch:02d}-{val_loss:.2f}.keras',
monitor='val_loss',
mode='min',
save_best_only=True,
verbose=1
)
cb_csvlogger = tf.keras.callbacks.CSVLogger(
filename='training_log.csv',
separator=',',
append=False
)
# Constants
scoring = 'neg_mean_absolute_error'
kfold = KFold(n_splits=5, random_state=42, shuffle=True)
n_iter = 2
epochs = 100
# Model parameters
max_layers = 5
output_layer = Dense(1)
# Define input shape
input_shape = (X_train.shape[1],)
# Function to build search space
def build_search_space():
search_space = {
'lr': np.arange(0.001, 0.0501, 0.0001),
'layers': np.arange(1, max_layers + 1),
'batch_size': [16, 32, 64, 128],
'optimizers': [RMSprop, Adam, Nadam]
}
for i in range(1, max_layers + 1):
search_space.update({
f'n_units{i}': np.arange(64, 513, 64),
f'kernel{i}': ['glorot_normal', 'he_normal', 'he_uniform', 'glorot_uniform'],
f'active{i}': ['relu', 'tanh', 'sigmoid'],
f'dropout{i}': np.arange(0.0, 0.8, 0.1),
f'normalizer{i}': [0, 1]
})
return search_space
# Function to build the model
def build_nn_model(lr, layers, batch_size, optimizers, **params):
model = Sequential()
for i in range(1, layers):
if i == 1:
model.add(Dense(params[f'n_units{i}'], activation=params[f'active{i}'],
kernel_initializer=params[f'kernel{i}'], input_shape=input_shape))
else:
model.add(Dense(params[f'n_units{i}'], activation=params[f'active{i}'],
kernel_initializer=params[f'kernel{i}']))
model.add(Dropout(params[f'dropout{i}']))
if params[f'normalizer{i}'] == 1:
model.add(BatchNormalization())
model.add(output_layer)
model.compile(optimizer=optimizers(learning_rate=lr), loss='mae')
return model
# Build search parameters
search_space = build_search_space()
# Wrap model for BayesSearchCV
model = KerasRegressor(build_fn=build_nn_model, verbose=0, epochs=epochs,
callbacks=[val_cb_reducelr, val_cb_earlystop])
# Bayesian optimization
bayes = BayesSearchCV(estimator=model, search_spaces=search_space,
cv=kfold, n_iter=n_iter, scoring=scoring, verbose=1,
random_state=42, n_jobs=-1)
# Fit Bayesian search
bayes.fit(X_train, y_train)
# Build tuned model
tuned_model = Sequential()
for i in range(1, bayes.best_params_['layers']):
if i == 1:
tuned_model.add(Dense(bayes.best_params_[f'n_units{i}'],
activation=bayes.best_params_[f'active{i}'],
kernel_initializer=bayes.best_params_[f'kernel{i}'],
input_shape=input_shape))
else:
tuned_model.add(Dense(bayes.best_params_[f'n_units{i}'],
activation=bayes.best_params_[f'active{i}'],
kernel_initializer=bayes.best_params_[f'kernel{i}']))
tuned_model.add(Dropout(bayes.best_params_[f'dropout{i}']))
if bayes.best_params_[f'normalizer{i}'] == 1:
tuned_model.add(BatchNormalization())
tuned_model.add(output_layer)
tuned_model.compile(loss='mae', metrics=['mae'], optimizer=bayes.best_params_['optimizers'](
learning_rate=bayes.best_params_['lr']))
# Fit tuned model
history = tuned_model.fit(X_train, y_train, epochs=200,
batch_size=bayes.best_params_['batch_size'],
callbacks=[val_cb_checkpoint, val_cb_reducelr, val_cb_earlystop, cb_csvlogger],
validation_data=(X_test, y_test))
# Evaluate tuned model
mae_score = tuned_model.evaluate(X_test, y_test, verbose=0)
print(f"Final Test MAE: {mae_score}")
Код: Выделить всё
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\Admin\anaconda3\envs\ML_241031\Lib\site-packages\joblib\externals\loky\process_executor.py", line 463, in _process_worker
r = call_item()
^^^^^^^^^^^
File "C:\Users\Admin\anaconda3\envs\ML_241031\Lib\site-packages\joblib\externals\loky\process_executor.py", line 291, in __call__
return self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Admin\anaconda3\envs\ML_241031\Lib\site-packages\joblib\parallel.py", line 598, in __call__
return [func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Admin\anaconda3\envs\ML_241031\Lib\site-packages\sklearn\utils\parallel.py", line 136, in __call__
return self.function(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Admin\anaconda3\envs\ML_241031\Lib\site-packages\sklearn\model_selection\_validation.py", line 876, in _fit_and_score
estimator = estimator.set_params(**clone(parameters, safe=False))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Admin\anaconda3\envs\ML_241031\Lib\site-packages\scikeras\wrappers.py", line 1175, in set_params
raise ValueError(
ValueError: Invalid parameter active1 for estimator KerasRegressor.
This issue can likely be resolved by setting this parameter in the KerasRegressor constructor:
`KerasRegressor(active1=tanh)`
Check the list of available parameters with `estimator.get_params().keys()`
"""
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In[3], line 119
114 bayes = BayesSearchCV(estimator=model, search_spaces=search_space,
115 cv=kfold, n_iter=n_iter, scoring=scoring, verbose=1,
116 random_state=42, n_jobs=-1)
118 # Fit Bayesian search
--> 119 bayes.fit(X_train, y_train)
121 # Build tuned model
122 tuned_model = Sequential()
File ~\anaconda3\envs\ML_241031\Lib\site-packages\skopt\searchcv.py:542, in BayesSearchCV.fit(self, X, y, groups, callback, **fit_params)
535 if callable(self.refit):
536 raise ValueError(
537 "BayesSearchCV doesn't support a callable refit, "
538 "as it doesn't define an implicit score to "
539 "optimize"
540 )
--> 542 super().fit(X=X, y=y, groups=groups, **fit_params)
544 # BaseSearchCV never ranked train scores,
545 # but apparently we used to ship this (back-compat)
546 if self.return_train_score:
File ~\anaconda3\envs\ML_241031\Lib\site-packages\sklearn\base.py:1473, in _fit_context..decorator..wrapper(estimator, *args, **kwargs)
1466 estimator._validate_params()
1468 with config_context(
1469 skip_parameter_validation=(
1470 prefer_skip_nested_validation or global_skip_validation
1471 )
1472 ):
-> 1473 return fit_method(estimator, *args, **kwargs)
File ~\anaconda3\envs\ML_241031\Lib\site-packages\sklearn\model_selection\_search.py:1018, in BaseSearchCV.fit(self, X, y, **params)
1012 results = self._format_results(
1013 all_candidate_params, n_splits, all_out, all_more_results
1014 )
1016 return results
-> 1018 self._run_search(evaluate_candidates)
1020 # multimetric is determined here because in the case of a callable
1021 # self.scoring the return type is only known after calling
1022 first_test_score = all_out[0]["test_scores"]
File ~\anaconda3\envs\ML_241031\Lib\site-packages\skopt\searchcv.py:599, in BayesSearchCV._run_search(self, evaluate_candidates)
595 while n_iter > 0:
596 # when n_iter < n_points points left for evaluation
597 n_points_adjusted = min(n_iter, n_points)
--> 599 optim_result, score_name = self._step(
600 search_space,
601 optimizer,
602 score_name,
603 evaluate_candidates,
604 n_points=n_points_adjusted,
605 )
606 n_iter -= n_points
608 if eval_callbacks(callbacks, optim_result):
File ~\anaconda3\envs\ML_241031\Lib\site-packages\skopt\searchcv.py:453, in BayesSearchCV._step(self, search_space, optimizer, score_name, evaluate_candidates, n_points)
450 # make lists into dictionaries
451 params_dict = [point_asdict(search_space, p) for p in params]
--> 453 all_results = evaluate_candidates(params_dict)
455 # if self.scoring is a callable, we have to wait until here
456 # to get the score name
457 if score_name is None:
File ~\anaconda3\envs\ML_241031\Lib\site-packages\sklearn\model_selection\_search.py:964, in BaseSearchCV.fit..evaluate_candidates(candidate_params, cv, more_results)
956 if self.verbose > 0:
957 print(
958 "Fitting {0} folds for each of {1} candidates,"
959 " totalling {2} fits".format(
960 n_splits, n_candidates, n_candidates * n_splits
961 )
962 )
--> 964 out = parallel(
965 delayed(_fit_and_score)(
966 clone(base_estimator),
967 X,
968 y,
969 train=train,
970 test=test,
971 parameters=parameters,
972 split_progress=(split_idx, n_splits),
973 candidate_progress=(cand_idx, n_candidates),
974 **fit_and_score_kwargs,
975 )
976 for (cand_idx, parameters), (split_idx, (train, test)) in product(
977 enumerate(candidate_params),
978 enumerate(cv.split(X, y, **routed_params.splitter.split)),
979 )
980 )
982 if len(out) < 1:
983 raise ValueError(
984 "No fits were performed. "
985 "Was the CV iterator empty? "
986 "Were there no candidates?"
987 )
File ~\anaconda3\envs\ML_241031\Lib\site-packages\sklearn\utils\parallel.py:74, in Parallel.__call__(self, iterable)
69 config = get_config()
70 iterable_with_config = (
71 (_with_config(delayed_func, config), args, kwargs)
72 for delayed_func, args, kwargs in iterable
73 )
---> 74 return super().__call__(iterable_with_config)
File ~\anaconda3\envs\ML_241031\Lib\site-packages\joblib\parallel.py:2007, in Parallel.__call__(self, iterable)
2001 # The first item from the output is blank, but it makes the interpreter
2002 # progress until it enters the Try/Except block of the generator and
2003 # reaches the first `yield` statement. This starts the asynchronous
2004 # dispatch of the tasks to the workers.
2005 next(output)
-> 2007 return output if self.return_generator else list(output)
File ~\anaconda3\envs\ML_241031\Lib\site-packages\joblib\parallel.py:1650, in Parallel._get_outputs(self, iterator, pre_dispatch)
1647 yield
1649 with self._backend.retrieval_context():
-> 1650 yield from self._retrieve()
1652 except GeneratorExit:
1653 # The generator has been garbage collected before being fully
1654 # consumed. This aborts the remaining tasks if possible and warn
1655 # the user if necessary.
1656 self._exception = True
File ~\anaconda3\envs\ML_241031\Lib\site-packages\joblib\parallel.py:1754, in Parallel._retrieve(self)
1747 while self._wait_retrieval():
1748
1749 # If the callback thread of a worker has signaled that its task
1750 # triggered an exception, or if the retrieval loop has raised an
1751 # exception (e.g. `GeneratorExit`), exit the loop and surface the
1752 # worker traceback.
1753 if self._aborting:
-> 1754 self._raise_error_fast()
1755 break
1757 # If the next job is not ready for retrieval yet, we just wait for
1758 # async callbacks to progress.
File ~\anaconda3\envs\ML_241031\Lib\site-packages\joblib\parallel.py:1789, in Parallel._raise_error_fast(self)
1785 # If this error job exists, immediately raise the error by
1786 # calling get_result. This job might not exists if abort has been
1787 # called directly or if the generator is gc'ed.
1788 if error_job is not None:
-> 1789 error_job.get_result(self.timeout)
File ~\anaconda3\envs\ML_241031\Lib\site-packages\joblib\parallel.py:745, in BatchCompletionCallBack.get_result(self, timeout)
739 backend = self.parallel._backend
741 if backend.supports_retrieve_callback:
742 # We assume that the result has already been retrieved by the
743 # callback thread, and is stored internally. It's just waiting to
744 # be returned.
--> 745 return self._return_or_raise()
747 # For other backends, the main thread needs to run the retrieval step.
748 try:
File ~\anaconda3\envs\ML_241031\Lib\site-packages\joblib\parallel.py:763, in BatchCompletionCallBack._return_or_raise(self)
761 try:
762 if self.status == TASK_ERROR:
--> 763 raise self._result
764 return self._result
765 finally:
ValueError: Invalid parameter active1 for estimator KerasRegressor.
This issue can likely be resolved by setting this parameter in the KerasRegressor constructor:
`KerasRegressor(active1=tanh)`
Check the list of available parameters with `estimator.get_params().keys()`
Подробнее здесь: https://stackoverflow.com/questions/792 ... nn-regress
Мобильная версия