Обучение LSTM переходит к Нану, когда номер_пакета>=2Python

Программы на Python
Ответить
Anonymous
 Обучение LSTM переходит к Нану, когда номер_пакета>=2

Сообщение Anonymous »

Я использую очень простую многоуровневую сеть LSTM. Я установил различные преобразования для данных, а именно StandardScaler(), RobustScaler() и MinMaxScaler(). Код работает плавно, и я не получаю ошибок при использовании этих масштабаторов. Но если я использую необработанные данные, после эпохи>=2 они получат только NAn. Я установил несколько отпечатков, чтобы увидеть, связано ли это с формой тензоров, пробовал разные скорости обучения, разные оптимизаторы, менял функцию активации в слоях LSTM, инициализатор ядра... ничего не работает. Странно то, что тот же код, который генерирует преобразованные данные, является тем же самым, через который я передаю свои данные, чтобы получить тензоры для необработанных данных...
Код следующее:

Код: Выделить всё

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import numpy as np
def scaling_types(batched_regressor: list, scale_target, scaling_option='identity', return_scaler=True):

"""
This function computed several transformations in the train_regressors, independently,
for each batch, also independtly. If a batch_size=32, then 32 different scallings will be
done. Include_target should be set = False, because it could introduce leakage if several
steps_out are predicted. Even for just 1 step_out that could be problem, but not as serious
Args:
batched_regressor = [input_window, input_target]
input_window:  Input feature data (e.g., shape: [batch_size, window_size, n_features]).
If input_window.shape=(batch_size, window_size) it is transformed in input_window.shape=(batch_size, window_size, 1)
input_target:  Target values (e.g., shape: [batch_size, steps_out])
If input_target.shape=(batch_size, window_size) it is transformed in input_target.shape=(batch_size, window_size, 1)
scaling_option: Scaling method to apply. Choose from the following options:
- 'identity': No scaling
- 'standard': StandardScaler (zero mean, unit variance)
- 'minmax': MinMaxScaler (scales between 0 and 1)
- 'robust': RobustScaler (scales with median and IQR)
scale_target: Whether to scale the target values (default: False)
return_scaler: To return the scaling parameters to perform inverse transform

Returns:
input_window_scaled, input_target_scaled: Scaled versions of input_window and input_target
with shape (batch_size, n_steps_out, 1).  If no input_target is provided, it just return the
transformed input_window
scaler: parameters for inverse transformation, if needed
"""

# Initialize scaler based on the selected option
if scaling_option == 'standard':
scaler = StandardScaler()
elif scaling_option == 'minmax':
scaler = MinMaxScaler()
elif scaling_option == 'robust':
scaler = RobustScaler()
elif scaling_option == 'identity':
scaler = None  # 'identity' - no scaling

# Check how many arrays are in the input
if isinstance(batched_regressor, list):
input_windows = batched_regressor[0]
input_targets = batched_regressor[1] if len(batched_regressor) >  1 else None
else:
input_windows = batched_regressor
input_targets = None

if input_windows is not None and input_targets is not None:
if input_windows.ndim == 2 and input_targets.ndim == 2:
print('Transformação certa')
input_windows = np.expand_dims(input_windows, -1)
input_targets = np.expand_dims(input_targets, -1)
elif input_windows.ndim == 3 and input_targets.ndim == 3:
input_windows = input_windows
input_targets = input_targets
elif input_windows.ndim == 2 and input_targets.ndim == 3:
input_windows = np.expand_dims(input_windows, -1)
input_targets = input_targets
elif input_windows.ndim == 3 and input_targets.ndim == 2:
input_windows = input_windows
input_targets = np.expand_dims(input_targets, -1)
else:
print('Check input_window and input_target shapes:')
raise ValueError(f"Unexpected input shapes: input_windows={input_windows.shape}, input_targets={input_targets.shape}")

if scaler:
print('entramos no scaler que implica input_targets different de None e que não estamos no identity')
if scale_target:
print('entramos no combined data')
# Concatenate the current regressor and target arrays (potential leakage?)
combined_data = np.concatenate([input_windows, input_targets], axis=1)

if scaler != StandardScaler():
# Fit and transform the combined data
print('entramos no scaler certo data')
scaled_combined = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in combined_data])
print('estamos a fazer o scale certo')
# Split the scaled data back into regressors and targets
input_windows = scaled_combined[:, :input_windows.shape[1], :]
input_targets = scaled_combined[:, input_windows.shape[1]:, :]
if scaler == StandardScaler():
scaled_combined = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in combined_data])
print('estamos a usar o standard scaling')
# Split the scaled data back into regressors and targets
input_windows = scaled_combined[:, :input_windows.shape[1], :]
input_targets = scaled_combined[:, input_windows.shape[1]:, :]
else:
if scaler == MinMaxScaler() or RobustScaler():
input_windows = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in input_windows])
elif scaler == StandardScaler():
input_windows = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in input_windows])

else:
input_windows = np.array(input_windows)
input_targets = np.array(input_targets)

elif input_targets is None:
if input_windows.ndim == 2:
input_windows = np.expand_dims(input_windows, -1)
elif input_windows.ndim == 3:
input_windows = input_windows

if scaler:
if scaler == MinMaxScaler() or RobustScaler():
# Fit and transform the combined data
input_windows = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in input_windows])
if scaler == StandardScaler():
input_windows = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in input_windows])
else:
input_windows = input_windows

if input_targets is None:
return (input_windows, scaler) if return_scaler else input_windows
else:
if return_scaler:
return input_windows, input_targets, scaler
else:
return input_windows, input_targets
Следующий код представляет собой пользовательскую модель LSTM, которая на данный момент представляет собой очень простую архитектуру:

Код: Выделить всё

from keras import Model
from keras.layers import LSTM, Dense
from keras.initializers import HeNormal, Zeros, RandomNormal
from keras.layers import LeakyReLU
class LSTM_Model(Model):
def __init__(self, lstm_layer_1, lstm_layer_2, lstm_layer_3, lstm_layer_4, steps_out):
super(LSTM_Model, self).__init__()
white_noise_initializer = RandomNormal(mean=0.0, stddev=0.000005)
self.lstm_layer_1 = LSTM(lstm_layer_1, activation= None, kernel_initializer=white_noise_initializer, recurrent_initializer=white_noise_initializer, return_sequences=True)
self.lstm_layer_1_activation = LeakyReLU()
self.lstm_layer_2 = LSTM(lstm_layer_2, activation= None, kernel_initializer=white_noise_initializer, recurrent_initializer=white_noise_initializer, return_sequences=True)
self.lstm_layer_2_activation = LeakyReLU()
self.lstm_layer_3 = LSTM(lstm_layer_3, activation= None, kernel_initializer=white_noise_initializer, recurrent_initializer=white_noise_initializer, return_sequences=True)
self.lstm_layer_3_activation = LeakyReLU()
self.lstm_layer_4 = LSTM(lstm_layer_4, activation= None, kernel_initializer=white_noise_initializer, recurrent_initializer=white_noise_initializer, return_sequences=True)
self.lstm_layer_4_activation = LeakyReLU()
self.dense_layer = Dense(1, kernel_initializer=white_noise_initializer)
self.steps_out = steps_out

def call(self, inputs, training=True):
if training==True:
h = self.lstm_layer_1(inputs)
h = self.lstm_layer_1_activation(h)
h = self.lstm_layer_2(h)
h = self.lstm_layer_2_activation(h)
h = self.lstm_layer_3(h)
h = self.lstm_layer_3_activation(h)
h = self.lstm_layer_4(h)
h = self.lstm_layer_4_activation(h)
h = self.dense_layer(h)
h = h[:,-self.steps_out, :] #keep only the desired output_values of the output
return h
мои тренировки:

Код: Выделить всё

from functions_utils import Sliding_window_setup
from functions_utils.Sliding_window_setup import create_sliding_window_overlapping
import tensorflow as tf
import numpy as np
from keras.losses import MeanAbsolutePercentageError, MeanSquaredError
from keras.models import save_model, load_model
import random
from HP_Tunning_LSTM_models.LSTM_model_class import LSTM_Model
import pandas as pd
from functions_utils.scaling_transforms import scaling_types
import os
# Set seeds for reproducibility
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)
def Recursive_LSTM_function_train(all_data: pd.DataFrame,
sliding_window_setup: Sliding_window_setup,
window_size: int,
batch_size: int,
epochs: int,
optimizer,
learning_rate: float,
scaling_option: str,
scale_target,
trained_model = LSTM_Model(256,256,256,256, steps_out=1)):

train_regressors,train_target,test_regressors,test_target = sliding_window_setup(all_data,window_size,steps_out=1)

#train_regressors.shape = (n_examples, window_size), LSTM input.shape = (batch_size, window_size, n_features).  Neste caso, n_features=1
train_regressors = np.expand_dims(train_regressors, -1)
train_target = np.expand_dims(train_target, -1)

buffer_size = train_regressors.shape[0]//2 #select a relatively wide buffer size to ensure randonmness in training examples
train_dataset = tf.data.Dataset.from_tensor_slices((train_regressors, train_target))
train_dataset = train_dataset.shuffle(buffer_size, seed=seed).batch(batch_size, drop_remainder=True)

optimizer.learning_rate = learning_rate

train_loss = MeanSquaredError()

for epoch in range(epochs):

#calcular metric para a batch. Este tf.keras.metrics.Mean() ACUMULA as losses em cada
# batch (em vez de subsituir a loss do batch anterior pela loss do batch present).
# Assim no final de cada epoch e a Loss Média de cada epoch
epoch_loss_avg = tf.keras.metrics.Mean()

# Training loop
for batch, (train_regressors, train_target) in enumerate(train_dataset):
print (f'batch_numer: {batch}')
#print (f'train_regressors: {train_regressors}')
#print (f'train_target: {train_target}')
print (f'train_regressors.shape = {train_regressors.shape}')
print (f'train_target.shape: {train_target.shape}')

#Scaling each train_regressor window independently inside each batch. Target is not being scaled, although it is possible. Se why it is not the function that defines the scaling
train_regressors_scaled, train_target_scaled = scaling_types([train_regressors, train_target], scaling_option=scaling_option, scale_target=scale_target, return_scaler=False)
with tf.GradientTape() as tape:
y_pred = trained_model(train_regressors_scaled, training=True)
print (f'y_pred.shape: {y_pred.shape}')
print (f'y_pred: {y_pred}')
loss = train_loss(train_target, y_pred)
gradients = tape.gradient(loss, trained_model.trainable_variables)
optimizer.apply_gradients(zip(gradients, trained_model.trainable_variables))
epoch_loss_avg(loss)

print(f'Epoch: {epoch} ---------- Loss: {epoch_loss_avg.result().numpy()}')
print('\n')

# Save the trained model at the end of training
model_path = os.join('Optuna_LSTM_grid_search_results_1_steps_ahead/trained_models',f'model_{learning_rate}_{scaling_option}_for_{all_data.columns[0]}.h5')  # Unique name based on HPs
save_model(trained_model, model_path)

return trained_model, model_path
и, наконец, где вызывается весь код:

Код: Выделить всё

from ast import Dict, List
import tensorflow as tf
from keras.losses import MeanAbsolutePercentageError, MeanSquaredError
from keras.models import save_model, load_model
from functions_utils import Sliding_window_setup
from functions_utils.Sliding_window_setup import create_sliding_window_overlapping
from keras.optimizers.legacy import Adam, SGD
import pandas as pd
import numpy as np
import random, copy
from functions_utils.Pick_M import pick_M
from functions_utils.scaling_transforms import scaling_types
from HP_Tunning_LSTM_models.LSTM_model_class import LSTM_Model
from HP_Tunning_LSTM_models.LSTM_training_and_model_saving_loop import Recursive_LSTM_function_train
from HP_Tunning_LSTM_models.y_hat_dataframes import inference_and_prediction_interval_calculation
from HP_Tunning_LSTM_models.losses_df import errors_calculation_test_set
from HP_Tunning_LSTM_models.train_test_split import train_test_split
from typing import List
import optuna
from optuna.samplers import  BruteForceSampler
from datetime import datetime

# Set seeds for reproducibility
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)
my_LSTM, model_path = Recursive_LSTM_function_train(
all_data = pick_M('M1'),
sliding_window_setup = create_sliding_window_overlapping,
window_size = 30,
batch_size = 32,
epochs = 100,
optimizer = Adam(),
learning_rate = 0.001,
scaling_option = 'identity',
scale_target = False,
trained_model = LSTM_Model(256,256,256,256, steps_out=1),
)
Если нужен дополнительный код, спрашивайте.

Подробнее здесь: https://stackoverflow.com/questions/792 ... h-number-2
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Python»