Обнаружение аномалий с использованием автоэнкодера CNN вместе с кодированием изображений углового поля Грамиана для даннPython

Программы на Python
Ответить Пред. темаСлед. тема
Anonymous
 Обнаружение аномалий с использованием автоэнкодера CNN вместе с кодированием изображений углового поля Грамиана для данн

Сообщение Anonymous »

Здесь немного интересный вопрос. У меня есть модель автоэнкодера CNN, построенная с использованием тензорного потока, и я пытаюсь обнаружить аномалии с использованием оценки плотности ядра вместе с ошибкой реконструкции. Это отлично работает для одного набора данных, который я использую (обнаружение 90% и ложноположительных результатов 0,6%), однако с моим наиболее подходящим набором данных он обнаруживает все как аномалию. Мне нужна помощь, чтобы понять, что не так с моей моделью для этого набора данных. Модель не переоснащается, поскольку val_loss никогда не начинает увеличиваться, а потери уменьшаются.
ниже приведены значения для нормальных и аномальных данных. [0] = среднее значение плотности, [1] стандартная плотность, [2] среднее значение ошибки реконструкции, [3] стандартная ошибка реконструкции:
нормальный: (2822.07084922319, 1.3207919128364734e-10, 0,004408350901940139, 0.0006830073397453419)
аномальный: (-3017.546708702692, 4148.292151674365, 0.004043867258587852, 0.0015991246296969419)
Я использую сгенерированные угловые поля Грамиана. Ниже я прикреплю столбцы CSV, пример строки, подробную информацию о ней, модель автокодировщика и сценарий генерации GAF:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import random
import glob

# Check if GPU is available
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
print("Num GPUs Available: ", len(physical_devices))
# Optionally set memory growth to avoid memory allocation issues
for gpu in physical_devices:
tf.config.experimental.set_memory_growth(gpu, True)
else:
print("No GPU available, using CPU.")

# Define constants
MODEL_PATH = 'saved_autoencoder_model_falling.h5'
SIZE1 = 128
SIZE2 = 128 #stacked
batch_size = 64

# Define image generators for training, validation, and anomaly detection
datagen = ImageDataGenerator(rescale=1./255)
'''datagen = ImageDataGenerator(
rescale=1./255,
preprocessing_function=lambda x: x[:, :, :3] # Ensure only 3 channels are used
)'''

train_generator = datagen.flow_from_directory(
'GAF_IMG/TRAIN/',
target_size=(SIZE1, SIZE2),
batch_size=batch_size,
class_mode='input'
)

validation_generator = datagen.flow_from_directory(
'GAF_IMG/TEST/',
target_size=(SIZE1, SIZE2),
batch_size=batch_size,
class_mode='input'
)

anomaly_generator = datagen.flow_from_directory(
'GAF_IMG/ANOMALY/',
target_size=(SIZE1, SIZE2),
batch_size=batch_size,
class_mode='input'
)

# Function to build the autoencoder model
def build_autoencoder():
model = Sequential()
# Encoder
model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(SIZE1, SIZE2, 3)))
model.add(MaxPooling2D((2, 2), padding='same'))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), padding='same'))
model.add(Conv2D(16, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), padding='same'))

# Decoder
model.add(Conv2D(16, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(3, (3, 3), activation='sigmoid', padding='same'))

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
return model

# Check if the model is already saved, otherwise train a new one
if os.path.exists(MODEL_PATH):
print("Loading saved model...")
model = load_model(MODEL_PATH)
else:
print("Training new model...")
model = build_autoencoder()
history = model.fit(
train_generator,
steps_per_epoch=500 // batch_size,
epochs=1000,
validation_data=validation_generator,
validation_steps=75 // batch_size,
shuffle=True
)
# Save the model after training
model.save(MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")

# Display model summary
model.summary()

# The rest of your code continues...

# Get a batch of images to test the model
data_batch = []
img_num = 0
while img_num uninfected_values[0] + 1 * uninfected_values[1]
or recon_error > uninfected_values[2] + 1 * uninfected_values[3]
):
return True # Image is an anomaly
else:
return False

'''def check_anomaly(img_path):
try:
# Open and resize the image with updated resampling
img = Image.open(img_path).resize((128, 128), Image.Resampling.LANCZOS)
except PermissionError as e:
print(f"Permission denied: {e}")
return False # Return False for permission errors
except Exception as e:
print(f"Error opening image: {e}")
return False # Return False for other errors
img = np.array(img)
print(f"Image shape: {img.shape}") # Add this line to debug

img = np.array(img) / 255.0
img = img[np.newaxis, :, :, :]

encoded_img = encoder_model.predict(img)
encoded_img = [np.reshape(img, (out_vector_shape,)) for img in encoded_img]
density = kde.score_samples(encoded_img)[0]
reconstruction = model.predict(img)
recon_error = model.evaluate(reconstruction, img, batch_size=1)[0]

if density < uninfected_values[0] - 3 * uninfected_values[1] or density > uninfected_values[0] + 3 * uninfected_values[1] or recon_error > uninfected_values[2] + 3 * uninfected_values[3]:
return True # Image is an anomaly
else:
return False # Image is NOT an anomaly
'''
# Test the anomaly detection on all anomalous images
anom_image_paths = glob.glob('GAF_IMG/ANOMALY/images/*')
normal_file_paths = glob.glob('GAF_IMG/TEST/images/*')
correctly_identified = 0
false_id = 0
for img_path in anom_image_paths:
if check_anomaly(img_path): # If it returns True, it means an anomaly was correctly identified
correctly_identified += 1

# Calculate and print the percentage of correctly identified anomalies
total_anomalies = len(anom_image_paths)

for img_path in normal_file_paths:
if check_anomaly(img_path): # If it returns True, it means an anomaly was incorrectly identified
false_id += 1

# Calculate and print the percentage of correctly identified anomalies
total_normal = len(normal_file_paths)

if total_normal > 0:
accuracy_percentage_normal = (false_id / total_normal) * 100
print(f"Falsely identified anomalies: {false_id}/{total_normal} ({accuracy_percentage_normal:.2f}%)")
else:
print("No anomalous images found for testing.")

if total_anomalies > 0:
accuracy_percentage = (correctly_identified / total_anomalies) * 100
print(f"Correctly identified anomalies: {correctly_identified}/{total_anomalies} ({accuracy_percentage:.2f}%)")
else:
print("No anomalous images found for testing.")

данные в формате CSV:
x,y,z,010-000-024-033,010-000-030-096,020-000-032-221,020-000-033-111,anomaly
16.13363583394,18.03632157313945,11.785301895672383,0.0,0.0,1.0,0.0,0.0

Скрипт создания GAF
# Import necessary libraries
import os
from pyts.image import GramianAngularField
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from PIL import Image
from matplotlib import cm
# Load data
train_files = glob.glob("Falling-Dataset/data/train/*.csv")
test_files = glob.glob("Falling-Dataset/data/test/*.csv")

train_data = pd.concat([pd.read_csv(f) for f in train_files], ignore_index=True)
test_data = pd.concat([pd.read_csv(f) for f in test_files], ignore_index=True)

# Filter normal and anomalous events
train_data_normal = train_data[train_data['anomaly'] == 0]
train_data_anomaly = train_data[train_data['anomaly'] == 1]

# Split normal events into training and validation sets
train_data, val_data = train_test_split(train_data_normal, test_size=0.1, random_state=42)
train_data = train_data.drop('anomaly', axis=1)
val_data = val_data.drop('anomaly', axis=1)
train_data_anomaly = train_data_anomaly.drop('anomaly', axis=1)

# Drop non-coordinate columns
columns_to_drop = ['010-000-024-033', '010-000-030-096', '020-000-032-221', '020-000-033-111']
train_data_coord = train_data.drop(columns=columns_to_drop)
val_data_coord = val_data.drop(columns=columns_to_drop)
train_data_anomaly_coord = train_data_anomaly.drop(columns=columns_to_drop)

# Normalize the data between -1 and 1
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_coord = pd.DataFrame(scaler.fit_transform(train_data_coord), columns=train_data_coord.columns)
val_data_coord = pd.DataFrame(scaler.transform(val_data_coord), columns=val_data_coord.columns)
train_data_anomaly_coord = pd.DataFrame(scaler.transform(train_data_anomaly_coord), columns=train_data_anomaly_coord.columns)

# Set up the GAF transformer and parameters
gaf = GramianAngularField(method='summation')
timeStep = 14 # Set the time window to 14 timesteps

# Function to create GAF images for each 14-timestep window, stack them, and apply a color filter
def create_gaf_images_with_color(data, timestep, path, image_prefix, max_images=None):
os.makedirs(path, exist_ok=True)
images = []

# Loop over data in windows of 'timestep' size
for i in range(0, len(data) - timestep + 1, timestep):
# Initialize lists to hold GAF images for x, y, and z
gaf_x_list = []
gaf_y_list = []
gaf_z_list = []

# Loop to take 3 consecutive windows for each variable
for j in range(3):
window_x = data.iloc[i + j * timestep:i + (j + 1) * timestep, 0].to_numpy().reshape(1, -1)
window_y = data.iloc[i + j * timestep:i + (j + 1) * timestep, 1].to_numpy().reshape(1, -1)
window_z = data.iloc[i + j * timestep:i + (j + 1) * timestep, 2].to_numpy().reshape(1, -1)

# Generate GAF images for each variable
gaf_x = gaf.fit_transform(window_x)[0]
gaf_y = gaf.fit_transform(window_y)[0]
gaf_z = gaf.fit_transform(window_z)[0]

# Append GAF images to their respective lists
gaf_x_list.append(gaf_x)
gaf_y_list.append(gaf_y)
gaf_z_list.append(gaf_z)

# Stack the GAF images vertically (3x12) for each variable
stacked_gaf_x = np.concatenate(gaf_x_list, axis=0) # Shape should be (36, 128)
stacked_gaf_y = np.concatenate(gaf_y_list, axis=0) # Shape should be (36, 128)
stacked_gaf_z = np.concatenate(gaf_z_list, axis=0) # Shape should be (36, 128)

# Now stack the sets of GAF images horizontally (1x3 for each variable)
combined_gaf_image = np.concatenate([stacked_gaf_x, stacked_gaf_y, stacked_gaf_z], axis=1) # Shape (36, 384)
combined_gaf_image = np.clip(combined_gaf_image, 0, 1) # Ensure values are between 0 and 1

# Resizing to 128x128
combined_gaf_image_resized = np.array(Image.fromarray((combined_gaf_image * 255).astype(np.uint8)).resize((128, 128)))

# Apply colormap to create an RGB image
colored_image = cm.viridis(combined_gaf_image_resized / 255.0)[:, :, :3] # RGB channels only
img = Image.fromarray((colored_image * 255).astype(np.uint8)) # Convert to PIL Image

# Save the image using PIL
img.save(f"{path}/{image_prefix}_gaf_image_{i // timestep + 1}.png")
images.append(img)

# Check if we've reached the maximum number of images to save
if max_images is not None and len(images) >= max_images:
break # Stop if we've reached the limit

return np.array(images)

# Create directories if they do not exist
os.makedirs('GAF_IMG/TRAIN/images', exist_ok=True)
os.makedirs('GAF_IMG/TEST/images', exist_ok=True)
os.makedirs('GAF_IMG/ANOMALY/images', exist_ok=True)

# Create GAF images for each 14-timestep window in the training, validation, and anomaly data
train_gaf_images = create_gaf_images_with_color(train_data_coord, timeStep, path='GAF_IMG/TRAIN/images', image_prefix='train', max_images=500)
val_gaf_images = create_gaf_images_with_color(val_data_coord, timeStep, path='GAF_IMG/TEST/images', image_prefix='val', max_images=100)
anom_gaf_images = create_gaf_images_with_color(train_data_anomaly_coord, timeStep, path='GAF_IMG/ANOMALY/images', image_prefix='ANOM')


Подробнее здесь: https://stackoverflow.com/questions/791 ... ld-image-e
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение

Вернуться в «Python»