Я использую очень простую многоуровневую сеть LSTM. Я установил различные преобразования для данных, а именно StandardScaler(), RobustScaler() и MinMaxScaler(). Код работает плавно, и я не получаю ошибок при использовании этих масштабаторов. Но если я использую необработанные данные, после эпохи>=2 они получат только NAn. Я установил несколько отпечатков, чтобы увидеть, связано ли это с формой тензоров, пробовал разные скорости обучения, разные оптимизаторы, менял функцию активации в слоях LSTM, инициализатор ядра... ничего не работает. Странно то, что тот же код, который генерирует преобразованные данные, является тем же самым, через который я передаю свои данные, чтобы получить тензоры для необработанных данных...
Код следующее:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import numpy as np
def scaling_types(batched_regressor: list, scale_target, scaling_option='identity', return_scaler=True):
"""
This function computed several transformations in the train_regressors, independently,
for each batch, also independtly. If a batch_size=32, then 32 different scallings will be
done. Include_target should be set = False, because it could introduce leakage if several
steps_out are predicted. Even for just 1 step_out that could be problem, but not as serious
Args:
batched_regressor = [input_window, input_target]
input_window: Input feature data (e.g., shape: [batch_size, window_size, n_features]).
If input_window.shape=(batch_size, window_size) it is transformed in input_window.shape=(batch_size, window_size, 1)
input_target: Target values (e.g., shape: [batch_size, steps_out])
If input_target.shape=(batch_size, window_size) it is transformed in input_target.shape=(batch_size, window_size, 1)
scaling_option: Scaling method to apply. Choose from the following options:
- 'identity': No scaling
- 'standard': StandardScaler (zero mean, unit variance)
- 'minmax': MinMaxScaler (scales between 0 and 1)
- 'robust': RobustScaler (scales with median and IQR)
scale_target: Whether to scale the target values (default: False)
return_scaler: To return the scaling parameters to perform inverse transform
Returns:
input_window_scaled, input_target_scaled: Scaled versions of input_window and input_target
with shape (batch_size, n_steps_out, 1). If no input_target is provided, it just return the
transformed input_window
scaler: parameters for inverse transformation, if needed
"""
# Initialize scaler based on the selected option
if scaling_option == 'standard':
scaler = StandardScaler()
elif scaling_option == 'minmax':
scaler = MinMaxScaler()
elif scaling_option == 'robust':
scaler = RobustScaler()
elif scaling_option == 'identity':
scaler = None # 'identity' - no scaling
# Check how many arrays are in the input
if isinstance(batched_regressor, list):
input_windows = batched_regressor[0]
input_targets = batched_regressor[1] if len(batched_regressor) > 1 else None
else:
input_windows = batched_regressor
input_targets = None
if input_windows is not None and input_targets is not None:
if input_windows.ndim == 2 and input_targets.ndim == 2:
print('Transformação certa')
input_windows = np.expand_dims(input_windows, -1)
input_targets = np.expand_dims(input_targets, -1)
elif input_windows.ndim == 3 and input_targets.ndim == 3:
input_windows = input_windows
input_targets = input_targets
elif input_windows.ndim == 2 and input_targets.ndim == 3:
input_windows = np.expand_dims(input_windows, -1)
input_targets = input_targets
elif input_windows.ndim == 3 and input_targets.ndim == 2:
input_windows = input_windows
input_targets = np.expand_dims(input_targets, -1)
else:
print('Check input_window and input_target shapes:')
raise ValueError(f"Unexpected input shapes: input_windows={input_windows.shape}, input_targets={input_targets.shape}")
if scaler:
print('entramos no scaler que implica input_targets different de None e que não estamos no identity')
if scale_target:
print('entramos no combined data')
# Concatenate the current regressor and target arrays (potential leakage?)
combined_data = np.concatenate([input_windows, input_targets], axis=1)
if scaler != StandardScaler():
# Fit and transform the combined data
print('entramos no scaler certo data')
scaled_combined = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in combined_data])
print('estamos a fazer o scale certo')
# Split the scaled data back into regressors and targets
input_windows = scaled_combined[:, :input_windows.shape[1], :]
input_targets = scaled_combined[:, input_windows.shape[1]:, :]
if scaler == StandardScaler():
scaled_combined = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in combined_data])
print('estamos a usar o standard scaling')
# Split the scaled data back into regressors and targets
input_windows = scaled_combined[:, :input_windows.shape[1], :]
input_targets = scaled_combined[:, input_windows.shape[1]:, :]
else:
if scaler == MinMaxScaler() or RobustScaler():
input_windows = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in input_windows])
elif scaler == StandardScaler():
input_windows = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in input_windows])
else:
input_windows = np.array(input_windows)
input_targets = np.array(input_targets)
elif input_targets is None:
if input_windows.ndim == 2:
input_windows = np.expand_dims(input_windows, -1)
elif input_windows.ndim == 3:
input_windows = input_windows
if scaler:
if scaler == MinMaxScaler() or RobustScaler():
# Fit and transform the combined data
input_windows = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in input_windows])
if scaler == StandardScaler():
input_windows = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in input_windows])
else:
input_windows = input_windows
if input_targets is None:
return (input_windows, scaler) if return_scaler else input_windows
else:
if return_scaler:
return input_windows, input_targets, scaler
else:
return input_windows, input_targets
Следующий код представляет собой пользовательскую модель LSTM, которая на данный момент представляет собой очень простую архитектуру:
from functions_utils import Sliding_window_setup
from functions_utils.Sliding_window_setup import create_sliding_window_overlapping
import tensorflow as tf
import numpy as np
from keras.losses import MeanAbsolutePercentageError, MeanSquaredError
from keras.models import save_model, load_model
import random
from HP_Tunning_LSTM_models.LSTM_model_class import LSTM_Model
import pandas as pd
from functions_utils.scaling_transforms import scaling_types
import os
# Set seeds for reproducibility
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)
random.seed(seed)
def Recursive_LSTM_function_train(all_data: pd.DataFrame,
sliding_window_setup: Sliding_window_setup,
window_size: int,
batch_size: int,
epochs: int,
optimizer,
learning_rate: float,
scaling_option: str,
scale_target,
trained_model = LSTM_Model(256,256,256,256, steps_out=1)):
train_regressors,train_target,test_regressors,test_target = sliding_window_setup(all_data,window_size,steps_out=1)
#train_regressors.shape = (n_examples, window_size), LSTM input.shape = (batch_size, window_size, n_features). Neste caso, n_features=1
train_regressors = np.expand_dims(train_regressors, -1)
train_target = np.expand_dims(train_target, -1)
buffer_size = train_regressors.shape[0]//2 #select a relatively wide buffer size to ensure randonmness in training examples
train_dataset = tf.data.Dataset.from_tensor_slices((train_regressors, train_target))
train_dataset = train_dataset.shuffle(buffer_size, seed=seed).batch(batch_size, drop_remainder=True)
optimizer.learning_rate = learning_rate
train_loss = MeanSquaredError()
for epoch in range(epochs):
#calcular metric para a batch. Este tf.keras.metrics.Mean() ACUMULA as losses em cada
# batch (em vez de subsituir a loss do batch anterior pela loss do batch present).
# Assim no final de cada epoch e a Loss Média de cada epoch
epoch_loss_avg = tf.keras.metrics.Mean()
# Training loop
for batch, (train_regressors, train_target) in enumerate(train_dataset):
print (f'batch_numer: {batch}')
#print (f'train_regressors: {train_regressors}')
#print (f'train_target: {train_target}')
print (f'train_regressors.shape = {train_regressors.shape}')
print (f'train_target.shape: {train_target.shape}')
#Scaling each train_regressor window independently inside each batch. Target is not being scaled, although it is possible. Se why it is not the function that defines the scaling
train_regressors_scaled, train_target_scaled = scaling_types([train_regressors, train_target], scaling_option=scaling_option, scale_target=scale_target, return_scaler=False)
with tf.GradientTape() as tape:
y_pred = trained_model(train_regressors_scaled, training=True)
print (f'y_pred.shape: {y_pred.shape}')
print (f'y_pred: {y_pred}')
loss = train_loss(train_target, y_pred)
gradients = tape.gradient(loss, trained_model.trainable_variables)
optimizer.apply_gradients(zip(gradients, trained_model.trainable_variables))
epoch_loss_avg(loss)
print(f'Epoch: {epoch} ---------- Loss: {epoch_loss_avg.result().numpy()}')
print('\n')
# Save the trained model at the end of training
model_path = os.join('Optuna_LSTM_grid_search_results_1_steps_ahead/trained_models',f'model_{learning_rate}_{scaling_option}_for_{all_data.columns[0]}.h5') # Unique name based on HPs
save_model(trained_model, model_path)
return trained_model, model_path
Я использую очень простую многоуровневую сеть LSTM. Я установил различные преобразования для данных, а именно StandardScaler(), RobustScaler() и MinMaxScaler(). Код работает плавно, и я не получаю ошибок при использовании этих масштабаторов. Но если я использую необработанные данные, после эпохи>=2 они получат только NAn. Я установил несколько отпечатков, чтобы увидеть, связано ли это с формой тензоров, пробовал разные скорости обучения, разные оптимизаторы, менял функцию активации в слоях LSTM, инициализатор ядра... ничего не работает. Странно то, что тот же код, который генерирует преобразованные данные, является тем же самым, через который я передаю свои данные, чтобы получить тензоры для необработанных данных... Код следующее: [code]from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler import numpy as np def scaling_types(batched_regressor: list, scale_target, scaling_option='identity', return_scaler=True):
""" This function computed several transformations in the train_regressors, independently, for each batch, also independtly. If a batch_size=32, then 32 different scallings will be done. Include_target should be set = False, because it could introduce leakage if several steps_out are predicted. Even for just 1 step_out that could be problem, but not as serious Args: batched_regressor = [input_window, input_target] input_window: Input feature data (e.g., shape: [batch_size, window_size, n_features]). If input_window.shape=(batch_size, window_size) it is transformed in input_window.shape=(batch_size, window_size, 1) input_target: Target values (e.g., shape: [batch_size, steps_out]) If input_target.shape=(batch_size, window_size) it is transformed in input_target.shape=(batch_size, window_size, 1) scaling_option: Scaling method to apply. Choose from the following options: - 'identity': No scaling - 'standard': StandardScaler (zero mean, unit variance) - 'minmax': MinMaxScaler (scales between 0 and 1) - 'robust': RobustScaler (scales with median and IQR) scale_target: Whether to scale the target values (default: False) return_scaler: To return the scaling parameters to perform inverse transform
Returns: input_window_scaled, input_target_scaled: Scaled versions of input_window and input_target with shape (batch_size, n_steps_out, 1). If no input_target is provided, it just return the transformed input_window scaler: parameters for inverse transformation, if needed """
# Initialize scaler based on the selected option if scaling_option == 'standard': scaler = StandardScaler() elif scaling_option == 'minmax': scaler = MinMaxScaler() elif scaling_option == 'robust': scaler = RobustScaler() elif scaling_option == 'identity': scaler = None # 'identity' - no scaling
# Check how many arrays are in the input if isinstance(batched_regressor, list): input_windows = batched_regressor[0] input_targets = batched_regressor[1] if len(batched_regressor) > 1 else None else: input_windows = batched_regressor input_targets = None
if input_windows is not None and input_targets is not None: if input_windows.ndim == 2 and input_targets.ndim == 2: print('Transformação certa') input_windows = np.expand_dims(input_windows, -1) input_targets = np.expand_dims(input_targets, -1) elif input_windows.ndim == 3 and input_targets.ndim == 3: input_windows = input_windows input_targets = input_targets elif input_windows.ndim == 2 and input_targets.ndim == 3: input_windows = np.expand_dims(input_windows, -1) input_targets = input_targets elif input_windows.ndim == 3 and input_targets.ndim == 2: input_windows = input_windows input_targets = np.expand_dims(input_targets, -1) else: print('Check input_window and input_target shapes:') raise ValueError(f"Unexpected input shapes: input_windows={input_windows.shape}, input_targets={input_targets.shape}")
if scaler: print('entramos no scaler que implica input_targets different de None e que não estamos no identity') if scale_target: print('entramos no combined data') # Concatenate the current regressor and target arrays (potential leakage?) combined_data = np.concatenate([input_windows, input_targets], axis=1)
if scaler != StandardScaler(): # Fit and transform the combined data print('entramos no scaler certo data') scaled_combined = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in combined_data]) print('estamos a fazer o scale certo') # Split the scaled data back into regressors and targets input_windows = scaled_combined[:, :input_windows.shape[1], :] input_targets = scaled_combined[:, input_windows.shape[1]:, :] if scaler == StandardScaler(): scaled_combined = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in combined_data]) print('estamos a usar o standard scaling') # Split the scaled data back into regressors and targets input_windows = scaled_combined[:, :input_windows.shape[1], :] input_targets = scaled_combined[:, input_windows.shape[1]:, :] else: if scaler == MinMaxScaler() or RobustScaler(): input_windows = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in input_windows]) elif scaler == StandardScaler(): input_windows = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in input_windows])
elif input_targets is None: if input_windows.ndim == 2: input_windows = np.expand_dims(input_windows, -1) elif input_windows.ndim == 3: input_windows = input_windows
if scaler: if scaler == MinMaxScaler() or RobustScaler(): # Fit and transform the combined data input_windows = np.array([scaler.fit_transform(regressor_train_window) for regressor_train_window in input_windows]) if scaler == StandardScaler(): input_windows = np.array([scaler.fit_transform(regressor_train_window + 1e-8) for regressor_train_window in input_windows]) else: input_windows = input_windows
if input_targets is None: return (input_windows, scaler) if return_scaler else input_windows else: if return_scaler: return input_windows, input_targets, scaler else: return input_windows, input_targets [/code] Следующий код представляет собой пользовательскую модель LSTM, которая на данный момент представляет собой очень простую архитектуру: [code]from keras import Model from keras.layers import LSTM, Dense from keras.initializers import HeNormal, Zeros, RandomNormal from keras.layers import LeakyReLU class LSTM_Model(Model): def __init__(self, lstm_layer_1, lstm_layer_2, lstm_layer_3, lstm_layer_4, steps_out): super(LSTM_Model, self).__init__() white_noise_initializer = RandomNormal(mean=0.0, stddev=0.000005) self.lstm_layer_1 = LSTM(lstm_layer_1, activation= None, kernel_initializer=white_noise_initializer, recurrent_initializer=white_noise_initializer, return_sequences=True) self.lstm_layer_1_activation = LeakyReLU() self.lstm_layer_2 = LSTM(lstm_layer_2, activation= None, kernel_initializer=white_noise_initializer, recurrent_initializer=white_noise_initializer, return_sequences=True) self.lstm_layer_2_activation = LeakyReLU() self.lstm_layer_3 = LSTM(lstm_layer_3, activation= None, kernel_initializer=white_noise_initializer, recurrent_initializer=white_noise_initializer, return_sequences=True) self.lstm_layer_3_activation = LeakyReLU() self.lstm_layer_4 = LSTM(lstm_layer_4, activation= None, kernel_initializer=white_noise_initializer, recurrent_initializer=white_noise_initializer, return_sequences=True) self.lstm_layer_4_activation = LeakyReLU() self.dense_layer = Dense(1, kernel_initializer=white_noise_initializer) self.steps_out = steps_out
def call(self, inputs, training=True): if training==True: h = self.lstm_layer_1(inputs) h = self.lstm_layer_1_activation(h) h = self.lstm_layer_2(h) h = self.lstm_layer_2_activation(h) h = self.lstm_layer_3(h) h = self.lstm_layer_3_activation(h) h = self.lstm_layer_4(h) h = self.lstm_layer_4_activation(h) h = self.dense_layer(h) h = h[:,-self.steps_out, :] #keep only the desired output_values of the output return h [/code] мои тренировки: [code]from functions_utils import Sliding_window_setup from functions_utils.Sliding_window_setup import create_sliding_window_overlapping import tensorflow as tf import numpy as np from keras.losses import MeanAbsolutePercentageError, MeanSquaredError from keras.models import save_model, load_model import random from HP_Tunning_LSTM_models.LSTM_model_class import LSTM_Model import pandas as pd from functions_utils.scaling_transforms import scaling_types import os # Set seeds for reproducibility seed = 42 np.random.seed(seed) tf.random.set_seed(seed) random.seed(seed) def Recursive_LSTM_function_train(all_data: pd.DataFrame, sliding_window_setup: Sliding_window_setup, window_size: int, batch_size: int, epochs: int, optimizer, learning_rate: float, scaling_option: str, scale_target, trained_model = LSTM_Model(256,256,256,256, steps_out=1)):
buffer_size = train_regressors.shape[0]//2 #select a relatively wide buffer size to ensure randonmness in training examples train_dataset = tf.data.Dataset.from_tensor_slices((train_regressors, train_target)) train_dataset = train_dataset.shuffle(buffer_size, seed=seed).batch(batch_size, drop_remainder=True)
optimizer.learning_rate = learning_rate
train_loss = MeanSquaredError()
for epoch in range(epochs):
#calcular metric para a batch. Este tf.keras.metrics.Mean() ACUMULA as losses em cada # batch (em vez de subsituir a loss do batch anterior pela loss do batch present). # Assim no final de cada epoch e a Loss Média de cada epoch epoch_loss_avg = tf.keras.metrics.Mean()
# Training loop for batch, (train_regressors, train_target) in enumerate(train_dataset): print (f'batch_numer: {batch}') #print (f'train_regressors: {train_regressors}') #print (f'train_target: {train_target}') print (f'train_regressors.shape = {train_regressors.shape}') print (f'train_target.shape: {train_target.shape}')
#Scaling each train_regressor window independently inside each batch. Target is not being scaled, although it is possible. Se why it is not the function that defines the scaling train_regressors_scaled, train_target_scaled = scaling_types([train_regressors, train_target], scaling_option=scaling_option, scale_target=scale_target, return_scaler=False) with tf.GradientTape() as tape: y_pred = trained_model(train_regressors_scaled, training=True) print (f'y_pred.shape: {y_pred.shape}') print (f'y_pred: {y_pred}') loss = train_loss(train_target, y_pred) gradients = tape.gradient(loss, trained_model.trainable_variables) optimizer.apply_gradients(zip(gradients, trained_model.trainable_variables)) epoch_loss_avg(loss)
# Save the trained model at the end of training model_path = os.join('Optuna_LSTM_grid_search_results_1_steps_ahead/trained_models',f'model_{learning_rate}_{scaling_option}_for_{all_data.columns[0]}.h5') # Unique name based on HPs save_model(trained_model, model_path)
return trained_model, model_path [/code] и, наконец, где вызывается весь код: [code]from ast import Dict, List import tensorflow as tf from keras.losses import MeanAbsolutePercentageError, MeanSquaredError from keras.models import save_model, load_model from functions_utils import Sliding_window_setup from functions_utils.Sliding_window_setup import create_sliding_window_overlapping from keras.optimizers.legacy import Adam, SGD import pandas as pd import numpy as np import random, copy from functions_utils.Pick_M import pick_M from functions_utils.scaling_transforms import scaling_types from HP_Tunning_LSTM_models.LSTM_model_class import LSTM_Model from HP_Tunning_LSTM_models.LSTM_training_and_model_saving_loop import Recursive_LSTM_function_train from HP_Tunning_LSTM_models.y_hat_dataframes import inference_and_prediction_interval_calculation from HP_Tunning_LSTM_models.losses_df import errors_calculation_test_set from HP_Tunning_LSTM_models.train_test_split import train_test_split from typing import List import optuna from optuna.samplers import BruteForceSampler from datetime import datetime